This tutorial covers the basic population genetics analyses
for a set of populations (or closely related species)
1) Evaluating, filtering and plotting the genetic data
2) Test for Hardy-Weinberg-Equilibrium (HWE) and linkage between
loci
3) Calculating, assessing and plotting genotypic richness and diversity,
heterozygosity, F-statistics, and isolation-by-distance (IBD)
4) Performing and plotting principal component analyses (PCA) and
discriminant analyses of principal components (DAPC)
5) Creating maps based on the DAPC results
As input is required (see Import Data paragraph
below)
1) a vcf file with the genetic data
2) a dataset containing individual IDs, longitude, latitude and
population/species assignment for each sample/individual
The tutorial uses an example dataset of unpublished ddRAD data (164 individiduals of 8 species) from the Hemileuca maia buck moth species group (Saturniidae: Lepidoptera) distributed throughout the US and southern Canada.
install_if_missing <- function(pkg) { #create function to check and install required packages
if (!require(pkg, character.only = T)) {
install.packages(pkg, dependencies = T)
library(pkg, character.only = T)}}
install_if_missing("ggplot2")
install_if_missing("viridis")
install_if_missing("dplyr")
install_if_missing("adegenet")
install_if_missing("plotly")
install_if_missing("hierfstat")
install_if_missing("pegas")
install_if_missing("vcfR")
install_if_missing("poppr")
install_if_missing("reshape2")
install_if_missing("geosphere")
install_if_missing("stringr")
install_if_missing("mmod")
rm(list = ls()) #clear environment
setwd("C:/Users/danie/Desktop/PhD research/Pop data/") #set working directory (containing vcf and datafile)
dataset <- read.table(file = "C:/Users/danie/Desktop/PhD research/Pop data/dataset.txt", sep = "\t", header = T, stringsAsFactors = T) #import dataset containing Individual ID, Species, longitude and Latitude
pop_dataset <- vcfR2genind(read.vcfR("C:/Users/danie/Desktop/PhD research/Pop data/pop_dataset.vcf")) #import vcf files and convert it to genind object
population_assignment <- dataset$Species #set population assignment
population_assignment <- factor(population_assignment, levels = unique(population_assignment))
pop_dataset$pop <- droplevels(as.factor(population_assignment))
indNames(pop_dataset) <- dataset$ID #assign individual names using ID label
head(dataset) #check dataset
## ID Latitude Longitude Species
## 1 H.m.sandra_AL_Hmg344 33.44000 -86.61000 H.maia
## 2 H.m.sandra_AL_Hmg353 33.44000 -86.61000 H.maia
## 3 H.m.sandra_AL_Hmg358 33.44000 -86.61000 H.maia
## 4 H.latifascia_IA_Hmg315 41.92489 -95.99636 H.latifascia
## 5 H.latifascia_IA_Hmg321 41.92489 -95.99636 H.latifascia
## 6 H.m.sandra_KS_Hmg015 39.09000 -96.58000 H.maia
unique(pop_dataset$pop) #show populations
## [1] H.maia H.latifascia H.lucina
## [4] H.maia.x.lucina H.nevadensis H.sp.GreatLakesComplex
## [7] H.slosseri H.peigleri
## 8 Levels: H.maia H.latifascia H.lucina H.maia.x.lucina ... H.peigleri
nLoc(pop_dataset) #show number of loci
## [1] 549
poppr(pop_dataset) #show basic summary table, including number of individuals (N) and genotypes (MLG) per population, Shannon-Wiener index (H; Shannon 2001, genotype diversity), Stoddart and Taylor's (1988) G index (G, genotype diversity) and Nei’s unbiased gene diversity (Nei 1978; Hexp)
## Pop N MLG eMLG SE H G lambda E.5 Hexp
## 1 H.maia 75 75 10 0.00e+00 4.32 75 0.987 1 0.008531
## 2 H.latifascia 15 15 10 2.77e-07 2.71 15 0.933 1 0.001636
## 3 H.lucina 10 10 10 0.00e+00 2.30 10 0.900 1 0.000639
## 4 H.maia.x.lucina 1 1 1 0.00e+00 0.00 1 0.000 NaN 0.000000
## 5 H.nevadensis 22 22 10 0.00e+00 3.09 22 0.955 1 0.000351
## 6 H.sp.GreatLakesComplex 10 10 10 0.00e+00 2.30 10 0.900 1 0.000000
## 7 H.slosseri 19 19 10 2.51e-07 2.94 19 0.947 1 0.000968
## 8 H.peigleri 12 12 10 0.00e+00 2.48 12 0.917 1 0.003906
## 9 Total 164 164 10 0.00e+00 5.10 164 0.994 1 0.028232
## Ia rbarD File
## 1 3.95 0.0153 pop_dataset
## 2 3.32 0.0253 pop_dataset
## 3 2.54 0.0352 pop_dataset
## 4 NA NA pop_dataset
## 5 2.59 0.0642 pop_dataset
## 6 2.85 0.0275 pop_dataset
## 7 2.98 0.0227 pop_dataset
## 8 3.14 0.0258 pop_dataset
## 9 3.57 0.0103 pop_dataset
private_alleles(pop_dataset) %>% apply(MARGIN = 1, FUN = sum) #show number of private alleles (alleles found exclusively in one population) per site across all loci for each population
## H.maia H.latifascia H.lucina
## 473 74 31
## H.maia.x.lucina H.nevadensis H.sp.GreatLakesComplex
## 0 241 44
## H.slosseri H.peigleri
## 69 29
missing_data <- poppr::info_table(pop_dataset, type = "missing", plot = T) #plot missing data across loci and populations
round(missing_data[1:length(unique(pop_dataset$pop)), 1:4], 2) #show proportion of missing data across populations for first four loci
## Locus
## Population Chromosome24_358443 Chromosome24_358452
## H.maia 0.08 0.05
## H.latifascia 0.07 0.07
## H.lucina 0.20 0.20
## H.maia.x.lucina 1.00 1.00
## H.nevadensis 0.09 0.09
## H.sp.GreatLakesComplex 0.30 0.30
## H.slosseri 0.11 0.11
## H.peigleri 0.08 0.08
## Locus
## Population Chromosome24_358457 Chromosome24_358458
## H.maia 0.05 0.08
## H.latifascia 0.07 0.07
## H.lucina 0.20 0.20
## H.maia.x.lucina 1.00 1.00
## H.nevadensis 0.09 0.09
## H.sp.GreatLakesComplex 0.30 0.30
## H.slosseri 0.11 0.11
## H.peigleri 0.08 0.08
pop_dataset <- poppr::informloci(pop_dataset,
MAF = 0.01, #only retain loci with at least 1% of the minor allele (minor allele frequency cutoff to 5%)
cutoff = 2 / nInd(pop_dataset)) #only loci with at least (2 / total number of individuals) differentiating genotypes are retained (a locus must have at least (2 / total number of individuals) of its genotypes be different from each other to be considered useful and being retained to ensure that only loci with enough variation are kept)
## cutoff value: 1.21951219512195 % ( 2 samples ).
## MAF : 0.01
##
## Found 283 uninformative loci
## ============================
## 0 loci found with a cutoff of 2 samples
## 283 loci found with MAF < 0.01 :
## Chromosome24_358457, Chromosome24_358480, Chromosome24_358514,
## Chromosome29_216196, Chromosome29_216204, Chromosome29_216217,
## Chromosome29_216223, Chromosome29_216254, Chromosome30i_1475928,
## Chromosome30i_1475944, Chromosome30i_1476002, Chromosome30i_2476518,
## Chromosome30i_2476546, ctg00000001_1_7828523, ctg00000001_1_7828537,
## ctg00000001_1_8964629, ctg00000001_1_9453151, ctg00000001_1_9453152,
## ctg00000001_1_9453153, ctg00000001_1_9453217, ctg00000001_1_9453218,
## ctg00000001_1_10554521, ctg00000001_1_10554544, ctg00000001_1_12408856,
## ctg00000001_1_12408858, ctg00000001_1_12408871, ctg00000001_1_12416218,
## ctg00000001_1_12416246, ctg00000001_1_14226152, ctg00000001_1_14226160,
## ctg00000001_1_14226167, ctg00000001_1_14226184, ctg00000002_1_849307,
## ctg00000002_1_849313, ctg00000002_1_849315, ctg00000002_1_849338,
## ctg00000002_1_849341, ctg00000002_1_4063952, ctg00000002_1_4063959,
## ctg00000002_1_4063965, ctg00000002_1_4063968, ctg00000002_1_4063978,
## ctg00000002_1_4063989, ctg00000003_1_1051579, ctg00000003_1_1051586,
## ctg00000003_1_1051639, ctg00000003_1_1051658, ctg00000003_1_2424154,
## ctg00000003_1_2424211, ctg00000003_1_2424227, ctg00000003_1_2808819,
## ctg00000003_1_2808825, ctg00000003_1_2808888, ctg00000003_1_2808891,
## ctg00000003_1_2808897, ctg00000003_1_4110209, ctg00000003_1_4110212,
## ctg00000003_1_4110248, ctg00000003_1_4904340, ctg00000003_1_4904341,
## ctg00000003_1_4904342, ctg00000003_1_4904347, ctg00000003_1_4904381,
## ctg00000003_1_7667629, ctg00000003_1_7667678, ctg00000003_1_7667703,
## ctg00000003_1_10993711, ctg00000003_1_10993746, ctg00000003_1_10993752,
## ctg00000003_1_10993758, ctg00000004_1_6353370, ctg00000004_1_6353382,
## ctg00000004_1_6353383, ctg00000004_1_6353415, ctg00000004_1_8507507,
## ctg00000004_1_8507523, ctg00000004_1_8507536, ctg00000004_1_8507549,
## ctg00000004_1_8507566, ctg00000004_1_17062426, ctg00000004_1_17062461,
## ctg00000004_1_17062467, ctg00000005_1_39788, ctg00000005_1_39819,
## ctg00000005_1_39825, ctg00000005_1_39851, ctg00000005_1_2854486,
## ctg00000005_1_2854504, ctg00000005_1_2854560, ctg00000005_1_12161398,
## ctg00000005_1_12161428, ctg00000005_1_13627665, ctg00000005_1_13775183,
## ctg00000005_1_13775215, ctg00000005_1_13775226, ctg00000005_1_16183129,
## ctg00000005_1_16183132, ctg00000005_1_16183163, ctg00000005_1_16183216,
## ctg00000005_1_16585441, ctg00000005_1_16762194, ctg00000005_1_16762196,
## ctg00000005_1_16762204, ctg00000005_1_16762221, ctg00000005_1_16762237,
## ctg00000005_1_16762239, ctg00000005_1_16762272, ctg00000005_1_16762275,
## ctg00000005_1_16762281, ctg00000006_1_10939176, ctg00000006_1_10939207,
## ctg00000006_1_11253046, ctg00000006_1_17071611, ctg00000006_1_17071687,
## ctg00000007_1_880357, ctg00000007_1_880399, ctg00000007_1_880407,
## ctg00000007_1_880409, ctg00000007_1_880411, ctg00000007_1_880417,
## ctg00000007_1_880419, ctg00000007_1_2340133, ctg00000007_1_2340174,
## ctg00000007_1_2340176, ctg00000007_1_2340178, ctg00000007_1_2340179,
## ctg00000007_1_4686069, ctg00000007_1_9432358, ctg00000007_1_9432369,
## ctg00000007_1_9432370, ctg00000007_1_9432382, ctg00000008_1_1329324,
## ctg00000008_1_1329325, ctg00000008_1_1329373, ctg00000008_1_1329381,
## ctg00000008_1_12670593, ctg00000008_1_12670604, ctg00000008_1_12670614,
## ctg00000008_1_12670616, ctg00000008_1_14150999, ctg00000008_1_14151004,
## ctg00000008_1_14693202, ctg00000008_1_14693220, ctg00000008_1_14693228,
## ctg00000008_1_14693229, ctg00000008_1_14693264, ctg00000009_1_2742839,
## ctg00000009_1_2742850, ctg00000009_1_2742854, ctg00000009_1_2742868,
## ctg00000010_1_2259213, ctg00000010_1_2259232, ctg00000010_1_2259287,
## ctg00000010_1_3393151, ctg00000010_1_3393188, ctg00000010_1_3393199,
## ctg00000011_1_1511900, ctg00000011_1_1511916, ctg00000011_1_1511963,
## ctg00000011_1_15160649, ctg00000011_1_15160659, ctg00000011_1_15160692,
## ctg00000012_1_377654, ctg00000012_1_377672, ctg00000012_1_377698,
## ctg00000012_1_377699, ctg00000012_1_839965, ctg00000012_1_839974,
## ctg00000012_1_3950277, ctg00000012_1_13095605, ctg00000012_1_13095621,
## ctg00000012_1_13095632, ctg00000012_1_13095677, ctg00000012_1_13095689,
## ctg00000013_1_1138550, ctg00000013_1_1138570, ctg00000013_1_1138589,
## ctg00000013_1_1138599, ctg00000013_1_1138608, ctg00000013_1_1138613,
## ctg00000013_1_1138616, ctg00000013_1_1545622, ctg00000013_1_1545627,
## ctg00000013_1_1545675, ctg00000013_1_6142190, ctg00000013_1_8398605,
## ctg00000013_1_8398611, ctg00000013_1_8398633, ctg00000013_1_8398640,
## ctg00000013_1_8398652, ctg00000013_1_8398654, ctg00000013_1_8398661,
## ctg00000013_1_14156355, ctg00000018_1_4211436, ctg00000018_1_4211482,
## ctg00000018_1_4211483, ctg00000018_1_4211495, ctg00000018_1_4211510,
## ctg00000018_1_9034832, ctg00000018_1_9034843, ctg00000018_1_9034865,
## ctg00000018_1_9034878, ctg00000018_1_9034884, ctg00000018_1_10289957,
## ctg00000018_1_10748167, ctg00000018_1_10748198, ctg00000018_1_10748212,
## ctg00000018_1_10748215, ctg00000018_1_10748221, ctg00000018_1_10958065,
## ctg00000018_1_10958078, ctg00000018_1_12473010, ctg00000019_1_6928392,
## ctg00000019_1_6928396, ctg00000019_1_6928397, ctg00000019_1_6928399,
## ctg00000019_1_6928401, ctg00000019_1_6928411, ctg00000019_1_12304438,
## ctg00000019_1_12304439, ctg00000019_1_12304447, ctg00000019_1_12304491,
## ctg00000019_1_12304498, ctg00000020_1_5092275, ctg00000020_1_5092296,
## ctg00000020_1_5092311, ctg00000020_1_5092314, ctg00000020_1_5092336,
## ctg00000020_1_11841819, ctg00000020_1_11841837, ctg00000020_1_11841861,
## ctg00000021_1_11026161, ctg00000021_1_11026203, ctg00000022_1_4076414,
## ctg00000022_1_4076432, ctg00000022_1_4076444, ctg00000022_1_4076448,
## ctg00000022_1_4076453, ctg00000022_1_9093691, ctg00000022_1_9093733,
## ctg00000022_1_9093735, ctg00000023_1_8781750, ctg00000023_1_8781762,
## ctg00000023_1_8781765, ctg00000023_1_8781807, ctg00000023_1_8781813,
## ctg00000027_1_1234792, ctg00000027_1_1234800, ctg00000027_1_1234804,
## ctg00000027_1_1234807, ctg00000027_1_1234808, ctg00000027_1_1234811,
## ctg00000027_1_6873532, ctg00000027_1_6873559, ctg00000027_1_6873581,
## ctg00000028_1_7039670, ctg00000028_1_7039685, ctg00000028_1_7039704,
## ctg00000028_1_7039715, ctg00000028_1_7039740, ctg00000029_1_645349,
## ctg00000029_1_645366, ctg00000029_1_2812162, ctg00000029_1_2812214,
## ctg00000029_1_2812231, ctg00000029_1_2812241, ctg00000029_1_2812243,
## ctg00000029_1_2812248, ctg00000029_1_4340856, ctg00000029_1_4340866,
## ctg00000029_1_4340916, ctg00000029_1_4340931, ctg00000034_1_3304170,
## ctg00000034_1_3304171, ctg00000034_1_3304195, ctg00000034_1_3304216,
## locus_619310_20, locus_619310_26, locus_619310_45, locus_619310_55,
## locus_619310_82, locus_619310_87, locus_619310_90
pop_dataset <- pop_dataset %>% missingno("loci", cutoff = 0.25) #remove loci with average missing data higher than 25%
##
## Found 15851 missing values.
##
## 2 loci contained missing values greater than 25%
##
## Removing 2 loci: ctg00000009_1_2742855, ctg00000011_1_1511894
#pop_dataset <- pop_dataset %>% missingno("geno", cutoff = 0.3) #remove individuals with more than 30% missing genotypes
poppr(pop_dataset, quiet = T) #show basic summary table, including number of individuals (N) and genotypes (MLG) per population, Shannon-Wiener index (H; genotype diversity), Stoddart and Taylor's G index (G, genotype diversity) and expected heterozygosity (Hexp) per population
## Pop N MLG eMLG SE H G lambda E.5 Hexp
## 1 H.maia 75 75 10 0.00e+00 4.32 75 0.987 1 0.010682
## 2 H.latifascia 15 15 10 2.77e-07 2.71 15 0.933 1 0.003361
## 3 H.lucina 10 10 10 0.00e+00 2.30 10 0.900 1 0.001372
## 4 H.maia.x.lucina 1 1 1 0.00e+00 0.00 1 0.000 NaN 0.000000
## 5 H.nevadensis 22 22 10 0.00e+00 3.09 22 0.955 1 0.000723
## 6 H.sp.GreatLakesComplex 10 10 10 0.00e+00 2.30 10 0.900 1 0.000000
## 7 H.slosseri 19 19 10 2.51e-07 2.94 19 0.947 1 0.001984
## 8 H.peigleri 12 12 10 0.00e+00 2.48 12 0.917 1 0.008333
## 9 Total 164 164 10 0.00e+00 5.10 164 0.994 1 0.020394
## Ia rbarD File
## 1 3.98 0.0235 pop_dataset
## 2 3.08 0.0324 pop_dataset
## 3 2.45 0.0381 pop_dataset
## 4 NA NA pop_dataset
## 5 2.31 0.0744 pop_dataset
## 6 3.23 0.0417 pop_dataset
## 7 2.90 0.0283 pop_dataset
## 8 3.35 0.0334 pop_dataset
## 9 3.73 0.0176 pop_dataset
missing_data <- poppr::info_table(pop_dataset, type = "missing", plot = T) #plot missing data across loci and populations
round(missing_data[1:length(unique(pop_dataset$pop)), 1:4], 2) #show proportion of missing data across populations for first four loci
## Locus
## Population Chromosome24_358443 Chromosome24_358452
## H.maia 0.08 0.05
## H.latifascia 0.07 0.07
## H.lucina 0.20 0.20
## H.maia.x.lucina 1.00 1.00
## H.nevadensis 0.09 0.09
## H.sp.GreatLakesComplex 0.30 0.30
## H.slosseri 0.11 0.11
## H.peigleri 0.08 0.08
## Locus
## Population Chromosome24_358458 Chromosome24_358474
## H.maia 0.08 0.05
## H.latifascia 0.07 0.07
## H.lucina 0.20 0.20
## H.maia.x.lucina 1.00 1.00
## H.nevadensis 0.09 0.09
## H.sp.GreatLakesComplex 0.30 0.30
## H.slosseri 0.11 0.11
## H.peigleri 0.08 0.08
nLoc(pop_dataset) #show number of loci
## [1] 264
private_alleles(pop_dataset) %>% apply(MARGIN = 1, FUN = sum) #show number of private alleles (alleles found exclusively in one population) per site across all loci for each population
## H.maia H.latifascia H.lucina
## 289 25 17
## H.maia.x.lucina H.nevadensis H.sp.GreatLakesComplex
## 0 222 11
## H.slosseri H.peigleri
## 36 7
plot(summary(pop_dataset)$n.by.pop, summary(pop_dataset)$pop.n.all,
xlab = "Number of indiduals", ylab = "Number of alleles",
main = "Alleles numbers and sample sizes", type = "n")
text(summary(pop_dataset)$n.by.pop, summary(pop_dataset)$pop.n.all,
lab = names(summary(pop_dataset)$n.by.pop))
barplot(summary(pop_dataset)$Hexp - summary(pop_dataset)$Hobs,
main = "Heterozygosity: expected-observed", ylab = "Hexp - Hobs")
barplot(summary(pop_dataset)$n.by.pop,
main = "Sample sizes per population", ylab = "Number of genotypes", las = 3)
tail(sort(round(summary(pop_dataset)$Hobs, 2))) #show loci with highest observed heterozygosity
## ctg00000004_1_8507570 ctg00000007_1_4686089 ctg00000004_1_8507573
## 0.33 0.34 0.35
## ctg00000018_1_4211457 ctg00000019_1_6928341 ctg00000029_1_4340858
## 0.35 0.35 0.39
Diversity indices incorporate both genotypic richness and abundance.
round(((poppr(pop_dataset))$eMLG), 2) #show genotypic richness accounting for sample size differences (eMLG) via rarefaction
## [1] 10 10 10 1 10 10 10 10 10
round(((poppr(pop_dataset))$lambda), 2) #show Simpson's index lambda (Simpson 1949; measure of genotypic diversity: estimation of probability that two randomly selected genotypes are different scaling from 0 with no genotypes are different to 1 so that all genotypes are different)
## [1] 0.99 0.93 0.90 0.00 0.95 0.90 0.95 0.92 0.99
Corr_Simp_ind <- round((((poppr(pop_dataset, quiet = T))$N / ((poppr(pop_dataset, quiet = T))$N - 1)) * (poppr(pop_dataset, quiet = T))$lambda), 2) #calculate sample-size-corrected Simpson's index
data.frame(Population = levels(pop_dataset$pop), Value = Corr_Simp_ind[1:length(levels(pop_dataset$pop))], stringsAsFactors = F) #print corrected Simpson's index
## Population Value
## 1 H.maia 1
## 2 H.latifascia 1
## 3 H.lucina 1
## 4 H.maia.x.lucina NaN
## 5 H.nevadensis 1
## 6 H.sp.GreatLakesComplex 1
## 7 H.slosseri 1
## 8 H.peigleri 1
Eveness is a measure of distribution of genotype abundances so that a population with equally abundant genotypes yields value equal to 1 and a population dominated by single genotype is closer to zero.
round(((poppr(pop_dataset))$E.5), 2) #show evenness E5 (Pielou 1975, Ludwig & Reynolds 1988, GrĂĽnwald et al. 2003)
## [1] 1 1 1 NaN 1 1 1 1 1
A significant p-value indicates lower observed heterozygosity
if ((bartlett.test(list(summary(pop_dataset)$Hexp, summary(pop_dataset)$Hobs)))$p.value < 0.05) { #test for homogeneity of variances using Bartlett test
var_equal <- T #variances are significantly different
} else {var_equal <- F} #variances are not significantly different
t.test(summary(pop_dataset)$Hexp, summary(pop_dataset)$Hobs, #perform t-test
paired = T, var.equal = var_equal, alternative = "greater")
##
## Paired t-test
##
## data: summary(pop_dataset)$Hexp and summary(pop_dataset)$Hobs
## t = 11.445, df = 263, p-value < 2.2e-16
## alternative hypothesis: true mean difference is greater than 0
## 95 percent confidence interval:
## 0.04159992 Inf
## sample estimates:
## mean difference
## 0.04861071
hwe_results <- lapply(seppop(pop_dataset), hw.test, B = 0)
## Warning in hw.test.loci(x = x, B = B, ...): The following loci were ignored: Chromosome24_358443
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: Chromosome24_358452
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: Chromosome24_358458
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: Chromosome24_358474
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: Chromosome24_358511
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_7828507
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_7828542
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_7828546
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_9453136
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_9453145
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_9453161
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_9453188
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12408829
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12408841
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12408872
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12408878
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12408879
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12416200
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12416221
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12416241
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12416260
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000001_1_12416269
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000003_1_2808866
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000003_1_2808894
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_39811
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_39837
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_39838
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_39844
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_39863
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13627657
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13627661
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13627687
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13627702
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13627734
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13627737
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_13775219
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_16183149
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_16183220
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000005_1_16183223
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000006_1_17071610
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000006_1_17071669
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000006_1_17071679
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000006_1_17071700
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000007_1_2340206
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000007_1_9432361
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000007_1_9432417
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000008_1_1329350
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000008_1_1329379
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000008_1_14693223
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000008_1_14693269
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000012_1_377648
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000012_1_377650
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000012_1_377659
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000012_1_377687
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000012_1_377693
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000013_1_6142160
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_4211444
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_4211457
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_4211466
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_4211478
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_4211511
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_10748197
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_10748239
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_10748240
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_10748245
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_12472987
## (not the same ploidy for all individuals, or too many missing data)The following loci were ignored: ctg00000018_1_12473001
## (not the same ploidy for all individuals, or too many missing data)The follo
hwe_pvalues_df <- melt(sapply(hwe_results, "[", i = T, j = 3)) #extract p-values from each population's test results and convert matrix to data frame for ggplot
colnames(hwe_pvalues_df) <- c("Locus", "Population", "P_value")
hwe_pvalues_df$P_value_adj <- p.adjust(hwe_pvalues_df$P_value, method = "fdr") #perform FDR correction
hwe_pvalues_df$Significant <- hwe_pvalues_df$P_value_adj < 0.05 #create significance column based on adjusted p-values
ggplot(hwe_pvalues_df, aes(x = Locus, y = Population, fill = P_value)) + #plot full heatmap of P-values
geom_tile(color = "white") +
scale_fill_viridis_c(option = "mako", name = "P-value") +
labs(x = "Locus", y = "Population",
title = "HWE p-values across loci and populations") +
theme_minimal() +
theme(axis.text.x = element_blank(),
axis.text.y = element_text(size = 8),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
plot.title = element_text(hjust = 0.5),
plot.margin = margin(0, 0, 0, 0))
ggplot(hwe_pvalues_df, aes(x = Locus, y = Population)) + #plot significant p-values
geom_tile(aes(fill = Significant), color = "white") + #highlight only significant p-values
scale_fill_manual(values = c("white", "red"), name = "Significant") + #grey for non-significant, red for significant
labs(x = "Locus", y = "Population",
title = "Significant HWE p-values (FDR Corrected)") +
theme_minimal() +
theme(axis.text.x = element_blank(),
axis.text.y = element_text(size = 8),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
plot.title = element_text(hjust = 0.5),
plot.margin = margin(0, 0, 0, 0))
calculate_heterozygosity <- function(genind_data) { #create function to calculate gene diversity within each population
if (!inherits(genind_data, "genind")) {
stop("Input must be a genind object")}
populations <- levels(pop(genind_data))
observed_heterozygosity <- numeric(length(populations))
expected_heterozygosity <- numeric(length(populations))
for (i in seq_along(populations)) {
pop <- populations[i]
pop_data <- genind_data[pop(genind_data) == pop] #subset genind object by population
if (nInd(pop_data) == 0) {
warning(paste("No data for population:", pop))
next}
tryCatch({
summary_stats <- summary(pop_data) #check if Hobs and Hexp are present before computing means
if (!is.null(summary_stats$Hobs)) {
observed_heterozygosity[i] <- mean(summary_stats$Hobs, na.rm = T)}
if (!is.null(summary_stats$Hexp)) {
expected_heterozygosity[i] <- mean(summary_stats$Hexp, na.rm = T)}
}, error = function(e) {
warning(paste("Error processing population:", pop, "Details:", e$message))})}
heterozygosity_df <- data.frame(
Population = populations,
Observed = round(observed_heterozygosity, 2),
Expected = round(expected_heterozygosity, 2))
print(heterozygosity_df) #print results
return(heterozygosity_df)} #return heterozygosity data frame
plot_heterozygosity <- function(heterozygosity_df) { #create function for plotting
heterozygosity_long <- reshape2::melt(heterozygosity_df, id.vars = "Population",
variable.name = "Type", value.name = "Heterozygosity") #melt data frame for plotting
ggplot(heterozygosity_long, aes(x = Population, y = Heterozygosity, fill = Type)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
labs(x = "Population", y = "Heterozygosity", fill = "Type") +
theme_classic() +
scale_fill_manual(values = c("Observed" = "darkblue", "Expected" = "lightblue"))} #set fill colors
heterozygosity_df <- calculate_heterozygosity(pop_dataset) #calculate heterozygosity
## Population Observed Expected
## 1 H.maia 0.08 0.12
## 2 H.latifascia 0.09 0.09
## 3 H.lucina 0.07 0.08
## 4 H.maia.x.lucina 0.09 0.31
## 5 H.nevadensis 0.02 0.04
## 6 H.sp.GreatLakesComplex 0.08 0.09
## 7 H.slosseri 0.08 0.10
## 8 H.peigleri 0.08 0.09
plot_heterozygosity(heterozygosity_df) #plot heterozygosity
pop_dataset_loci <- pegas::Fst(pegas::as.loci(pop_dataset))
head(round(pop_dataset_loci, 2)) #per-locus F-statistics
## Fit Fst Fis
## Chromosome24_358443 0.23 0.13 0.12
## Chromosome24_358452 -0.01 0.00 -0.01
## Chromosome24_358458 0.32 0.25 0.09
## Chromosome24_358474 0.69 0.23 0.59
## Chromosome24_358511 0.67 0.09 0.64
## Chromosome29_216180 -0.01 -0.02 0.01
round(colMeans(pop_dataset_loci[, !is.na(colMeans(pop_dataset_loci, na.rm = T)) & !is.nan(colMeans(pop_dataset_loci, na.rm = T))], na.rm=T), 2) #global F-statistics (take into account all genotypes and loci but ignore loci with NA)
## Fit Fst Fis
## 0.36 0.13 0.27
round(boot.vc(genind2hierfstat(pop_dataset)[1], genind2hierfstat(pop_dataset)[-1], #calculate CIs of F-statistics (H-Total: total expected heterozygosity, F-pop/Total = Fst, F-Ind/Total = Fit, H-pop: expected heterozygosity within populations, F-Ind/pop: Fis, Hobs: observed heterozygosity) ot = 100)$ci, 2)
nboot = 5000)$ci, 2) #specify number of bootstraps
## H-Total F-pop/Total F-Ind/Total H-pop F-Ind/pop Hobs
## 2.5% 0.11 0.19 0.40 0.09 0.24 0.06
## 50% 0.13 0.23 0.44 0.10 0.27 0.07
## 97.5% 0.15 0.28 0.48 0.11 0.31 0.08
pairwise_gen_dist_matrix <- function(pop_dataset, genetic_diff_measure, viridis_col) {
distance_matrix <- switch(genetic_diff_measure,
"D_Jost" = as.matrix(mmod::pairwise_D(pop_dataset, linearized = F)), #calculate Jost's D (2008)
"Gst_Hedrick" = as.matrix(mmod::pairwise_Gst_Hedrick(pop_dataset, linearized = F)), #calculate Hedrick's G'st (2005)
"Fst" = {fst_results <- StAMPP::stamppFst(StAMPP::stamppConvert(dartR::gi2gl(pop_dataset, verbose = 0), type = "genlight"), nboots = 10000, percent = 95, nclusters = 1) #calculate Fst
fst_values <- as.matrix(fst_results$Fsts) #extract Fst results
p_values <- as.matrix(fst_results$Pvalues) #extract p-values
list(values = fst_values, p_values = p_values)}, #return Fst and p-values
"Ds_Nei" = as.matrix(genet.dist(pop_dataset, method = "Ds")), #compute Nei's standard genetic distance Ds (Nei 1972)
stop("Unsupported genetic differentiation measure. Choose from: 'D_Jost', 'Gst_Hedrick', 'Fst', 'Ds_Nei'."))
if (genetic_diff_measure == "Fst") { #check if using Fst
fst_values <- distance_matrix$values #extract Fst values
p_values <- distance_matrix$p_values #extract p-values
cat("p-values for Fst:\n")
print(p_values) #print p-values
} else {fst_values <- distance_matrix #use regular distance matrix for other measures
p_values <- NULL} #use no p-values for other measures
fst_values[fst_values < 0] <- 0 #replace negative values in distance matrix with 0
label_list <- c("D_Jost" = "Jost’s D", "Gst_Hedrick" = "G’", "Fst" = "F", "Ds_Nei" = "G") #labels for each measure
label <- label_list[[genetic_diff_measure]] #get label
label_lowercase <- ifelse(genetic_diff_measure %in% c("Gst_Hedrick", "Fst", "Ds_Nei"), "ST", "") #set lowercase label
cat(paste(label, "values:\n"))
print(round(fst_values, 2)) #show Fst values rounded to 2 decimal places
distance_matrix_lower <- fst_values #copy Fst values
distance_matrix_lower[!(lower.tri(distance_matrix_lower, diag = F))] <- NA #mask upper triangle
distance_matrix_long <- reshape2::melt(distance_matrix_lower, na.rm = T) #reshape to long format
colnames(distance_matrix_long) <- c("Row", "Column", "Distance") #rename columns
if (!is.null(p_values)) { #prepare p-value data for asterisks
p_values_lower <- p_values #copy p-values
p_values_lower[!(lower.tri(distance_matrix_lower, diag = F))] <- NA #mask upper triangle
p_values_long <- reshape2::melt(p_values_lower, na.rm = T) #reshape p-values to long format
colnames(p_values_long) <- c("Row", "Column", "P_value") #rename columns
distance_matrix_long <- merge(distance_matrix_long, p_values_long, by = c("Row", "Column")) #merge distance and p-values
distance_matrix_long$Significance <- ifelse(p_values_long$P_value <= (0.05 / length(unique(pop_dataset$pop)) * (length(unique(pop_dataset$pop)) - 1) / 2), "*", "")} #add asterisks for significant comparisons after Bonferroni correction
if (genetic_diff_measure == "Fst") { #check if using Fst
plot <- ggplot2::ggplot(distance_matrix_long, aes(x = Column, y = Row, fill = Distance)) + #create heatmap plot
geom_tile() +
geom_text(aes(label = Significance), size = 7, color = "#FFA500", na.rm = T) +
scale_fill_viridis_c(option = viridis_col, direction = -1) + #apply viridis color scale
theme_classic() +
labs(x = "Population", y = "Population", fill = bquote(.(label)[.(label_lowercase)]))} #label axes and legend
else {plot <- ggplot2::ggplot(distance_matrix_long, aes(x = Column, y = Row, fill = Distance)) + #create heatmap plot
geom_tile() +
scale_fill_viridis_c(option = viridis_col, direction = -1) + #apply viridis color scale
theme_classic() +
labs(x = "Population", y = "Population", fill = bquote(.(label)[.(label_lowercase)]))} #label axes and legend}
print(plot)} #print plot
pairwise_gen_dist_matrix(pop_dataset, genetic_diff_measure = "Fst", viridis_col = "mako") #compute and plot Fst (Weir and Cockerham 1984)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## Registered S3 method overwritten by 'genetics':
## method from
## [.haplotype pegas
## Starting gl.compliance.check
## Processing genlight object with SNP data
## The slot loc.all, which stores allele name for each locus, is empty.
## Creating a dummy variable (A/C) to insert in this slot.
## Checking coding of SNPs
## SNP data scored NA, 0, 1 or 2 confirmed
## Checking locus metrics and flags
## Recalculating locus metrics
## Checking for monomorphic loci
## No monomorphic loci detected
## Checking for loci with all missing data
## No loci with all missing data detected
## Checking whether individual names are unique.
## Checking for individual metrics
## Warning: Creating a slot for individual metrics
## Checking for population assignments
## Population assignments confirmed
## Spelling of coordinates checked and changed if necessary to
## lat/lon
## Completed: gl.compliance.check
## p-values for Fst:
## H.maia H.latifascia H.lucina H.maia.x.lucina
## H.maia NA NA NA NA
## H.latifascia 0.0000 NA NA NA
## H.lucina 0.0000 0.0000 NA NA
## H.maia.x.lucina 0.9252 0.7018 0.3052 NA
## H.nevadensis 0.0000 0.0000 0.0000 0.0000
## H.sp.GreatLakesComplex 0.0000 0.0000 0.0000 0.5395
## H.slosseri 0.0000 0.0000 0.0000 0.6456
## H.peigleri 0.0000 0.0000 0.0000 0.8438
## H.nevadensis H.sp.GreatLakesComplex H.slosseri
## H.maia NA NA NA
## H.latifascia NA NA NA
## H.lucina NA NA NA
## H.maia.x.lucina NA NA NA
## H.nevadensis NA NA NA
## H.sp.GreatLakesComplex 0 NA NA
## H.slosseri 0 0 NA
## H.peigleri 0 0 0
## H.peigleri
## H.maia NA
## H.latifascia NA
## H.lucina NA
## H.maia.x.lucina NA
## H.nevadensis NA
## H.sp.GreatLakesComplex NA
## H.slosseri NA
## H.peigleri NA
## F values:
## H.maia H.latifascia H.lucina H.maia.x.lucina
## H.maia NA NA NA NA
## H.latifascia 0.07 NA NA NA
## H.lucina 0.14 0.20 NA NA
## H.maia.x.lucina 0.00 0.00 0.03 NA
## H.nevadensis 0.42 0.52 0.63 0.69
## H.sp.GreatLakesComplex 0.07 0.10 0.23 0.00
## H.slosseri 0.11 0.10 0.22 0.00
## H.peigleri 0.08 0.13 0.21 0.00
## H.nevadensis H.sp.GreatLakesComplex H.slosseri
## H.maia NA NA NA
## H.latifascia NA NA NA
## H.lucina NA NA NA
## H.maia.x.lucina NA NA NA
## H.nevadensis NA NA NA
## H.sp.GreatLakesComplex 0.60 NA NA
## H.slosseri 0.55 0.12 NA
## H.peigleri 0.56 0.14 0.07
## H.peigleri
## H.maia NA
## H.latifascia NA
## H.lucina NA
## H.maia.x.lucina NA
## H.nevadensis NA
## H.sp.GreatLakesComplex NA
## H.slosseri NA
## H.peigleri NA
pairwise_gen_dist_matrix(pop_dataset, genetic_diff_measure = "D_Jost", viridis_col = "mako") #compute and plot Jost´s D (Host 2008)
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Jost’s D values:
## H.maia H.latifascia H.lucina H.maia.x.lucina
## H.maia 0.00 0.01 0.02 0.00
## H.latifascia 0.01 0.00 0.03 0.00
## H.lucina 0.02 0.03 0.00 0.01
## H.maia.x.lucina 0.00 0.00 0.01 0.00
## H.nevadensis 0.08 0.07 0.10 0.09
## H.sp.GreatLakesComplex 0.01 0.01 0.03 0.00
## H.slosseri 0.02 0.01 0.03 0.01
## H.peigleri 0.01 0.02 0.03 0.00
## H.nevadensis H.sp.GreatLakesComplex H.slosseri
## H.maia 0.08 0.01 0.02
## H.latifascia 0.07 0.01 0.01
## H.lucina 0.10 0.03 0.03
## H.maia.x.lucina 0.09 0.00 0.01
## H.nevadensis 0.00 0.09 0.08
## H.sp.GreatLakesComplex 0.09 0.00 0.02
## H.slosseri 0.08 0.02 0.00
## H.peigleri 0.08 0.02 0.01
## H.peigleri
## H.maia 0.01
## H.latifascia 0.02
## H.lucina 0.03
## H.maia.x.lucina 0.00
## H.nevadensis 0.08
## H.sp.GreatLakesComplex 0.02
## H.slosseri 0.01
## H.peigleri 0.00
pairwise_gen_dist_matrix(pop_dataset, genetic_diff_measure = "Gst_Hedrick", viridis_col = "mako") #compute and plot Hedrick's G'st (Hedrick 2005)
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## Warning in HsHt(g): Need at least two population to calculate differentiation
## G’ values:
## H.maia H.latifascia H.lucina H.maia.x.lucina
## H.maia 0.00 0.10 0.18 0.02
## H.latifascia 0.10 0.00 0.23 0.01
## H.lucina 0.18 0.23 0.00 0.10
## H.maia.x.lucina 0.02 0.01 0.10 0.00
## H.nevadensis 0.51 0.51 0.62 0.63
## H.sp.GreatLakesComplex 0.10 0.12 0.25 0.04
## H.slosseri 0.13 0.12 0.24 0.07
## H.peigleri 0.11 0.14 0.23 0.01
## H.nevadensis H.sp.GreatLakesComplex H.slosseri
## H.maia 0.51 0.10 0.13
## H.latifascia 0.51 0.12 0.12
## H.lucina 0.62 0.25 0.24
## H.maia.x.lucina 0.63 0.04 0.07
## H.nevadensis 0.00 0.59 0.56
## H.sp.GreatLakesComplex 0.59 0.00 0.14
## H.slosseri 0.56 0.14 0.00
## H.peigleri 0.55 0.17 0.08
## H.peigleri
## H.maia 0.11
## H.latifascia 0.14
## H.lucina 0.23
## H.maia.x.lucina 0.01
## H.nevadensis 0.55
## H.sp.GreatLakesComplex 0.17
## H.slosseri 0.08
## H.peigleri 0.00
pairwise_gen_dist_matrix(pop_dataset, genetic_diff_measure = "Ds_Nei", viridis_col = "mako") #compute and plot Nei's standard genetic distance Ds (Nei 1972)
## G values:
## H.maia H.latifascia H.lucina H.maia.x.lucina
## H.maia 0.00 0.01 0.03 0.03
## H.latifascia 0.01 0.00 0.03 0.04
## H.lucina 0.03 0.03 0.00 0.04
## H.maia.x.lucina 0.03 0.04 0.04 0.00
## H.nevadensis 0.08 0.07 0.10 0.10
## H.sp.GreatLakesComplex 0.02 0.02 0.04 0.04
## H.slosseri 0.02 0.02 0.04 0.05
## H.peigleri 0.02 0.02 0.03 0.03
## H.nevadensis H.sp.GreatLakesComplex H.slosseri
## H.maia 0.08 0.02 0.02
## H.latifascia 0.07 0.02 0.02
## H.lucina 0.10 0.04 0.04
## H.maia.x.lucina 0.10 0.04 0.05
## H.nevadensis 0.00 0.09 0.09
## H.sp.GreatLakesComplex 0.09 0.00 0.02
## H.slosseri 0.09 0.02 0.00
## H.peigleri 0.08 0.03 0.01
## H.peigleri
## H.maia 0.02
## H.latifascia 0.02
## H.lucina 0.03
## H.maia.x.lucina 0.03
## H.nevadensis 0.08
## H.sp.GreatLakesComplex 0.03
## H.slosseri 0.01
## H.peigleri 0.00
Inbreeding represents the excess of homozygosity in an individual due to inheriting two identical alleles from recent common ancestor. It can lead to inbreeding depression so the associated loss of fitness, often due to recessive deleterious alleles that are more frequent than in the population.
pop_dataset_inbreeding_mean <- sapply(inbreeding(pop_dataset, N = 1000, M = 2000), mean) #compute mean inbreeding values for each individual (mean from the likelihood distribution of each individual)
hist(pop_dataset_inbreeding_mean, main = "Average inbreeding in individuals") #plot distribution of (mean) inbreeding values for each individual
round(tail(sort(pop_dataset_inbreeding_mean), n = 10), 2) #list ten individuals with highest mean inbreeding
## H.m.sandra_NJ_Hmg113 H.nevadensis_NM_DN2530
## 0.69 0.71
## H.nevadensis_NV_DR99 H.m.menyanthevora_NY_DR90
## 0.71 0.72
## H.nevadensis_CA_X100b H.nevadensis_CA_DN2444
## 0.73 0.73
## H.m.menyanthevora_NY_DN2504 H.nevadensis_CA_DR34
## 0.73 0.74
## H.nevadensis_CA_DN2395 H.m.menyanthevora_NY_DN2469
## 0.75 0.75
pop_dataset_inbreeding_df <- data.frame(ID = names(pop_dataset_inbreeding_mean), Inbreeding_Mean = pop_dataset_inbreeding_mean, Population = pop_dataset$pop)
ggplot(pop_dataset_inbreeding_df, aes(x = Inbreeding_Mean)) + #plot distribution of (mean) inbreeding values for each individual for each population
geom_histogram(binwidth = 0.04, fill = "lightblue", color = "black", alpha = 0.8) +
facet_wrap(~ Population, scales = "free_y") +
labs(x = "Individual mean inbreeding value", y = "Frequency") +
theme_classic() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
pop_dataset_inbreeding_df %>% group_by(Population) %>% top_n(5, Inbreeding_Mean) %>% arrange(Population, desc(Inbreeding_Mean)) %>% mutate(Inbreeding_Mean = round(Inbreeding_Mean, 2)) #list ten individuals with highest mean inbreeding
## # A tibble: 36 Ă— 3
## # Groups: Population [8]
## ID Inbreeding_Mean Population
## <chr> <dbl> <fct>
## 1 H.m.menyanthevora_NY_DN2469 0.75 H.maia
## 2 H.m.menyanthevora_NY_DN2504 0.73 H.maia
## 3 H.m.menyanthevora_NY_DR90 0.72 H.maia
## 4 H.m.sandra_NJ_Hmg113 0.69 H.maia
## 5 H.m.menyanthevora_NY_DN2502 0.66 H.maia
## 6 H.latifascia_IA_DN2480 0.51 H.latifascia
## 7 H.latifascia_NE_Hmg041 0.51 H.latifascia
## 8 H.latifascia_IA_DN2518 0.5 H.latifascia
## 9 H.latifascia_NE_DN2523 0.5 H.latifascia
## 10 H.latifascia_IA_DN2479 0.5 H.latifascia
## # ℹ 26 more rows
inbreeding_individuals <- adegenet::inbreeding(pop_dataset, #compute individual inbreeding coefficients
res.type = "sample", #obtain sample of F values
N = 1000, #sample size
M = 5000) #number of different F values
inbreeding_df <- data.frame(ID = names(inbreeding_individuals), Inbreeding = unlist(inbreeding_individuals), Population = as.character(pop_dataset$pop)) #convert to dataframe
ggplot(inbreeding_df, aes(x = Inbreeding, color = Population)) + #plot density of inbreeding coefficients
geom_density(linewidth = 1.4) + #plot density with line width
scale_color_viridis_d(option = "magma", direction = -1) + #reverse magma palette
labs(x = "Inbreeding Coefficient", y = "Density") + #add axis labels
theme_classic() +
theme(legend.position = "right") #legend on the right
ggplot(inbreeding_df, aes(x = Inbreeding, fill = Population)) + #plot histogram of inbreeding coefficients
geom_histogram(bins = 30, alpha = 0.6, position = "identity") + #histogram with transparency
scale_fill_viridis_d(option = "magma", direction = -1) + #reverse magma palette
labs(x = "Inbreeding Coefficient", y = "Count") + #axis labels
theme_classic() +
theme(legend.position = "right")
inbr_ind_point_est <- adegenet::inbreeding(pop_dataset, #compute individual inbreeding coefficients
res.type = "estimate", #obtain point estimate of F values
N = 100, #sample size
M = 500) #number of different F values
inbreeding_merged <- merge(data.frame(ID = names(inbr_ind_point_est), Inbreeding = unlist(inbr_ind_point_est)), data.frame(ID = indNames(pop_dataset), Population = as.character(pop_dataset$pop)), by = "ID") #merge inbreeding coefficients with population info
population_inbreeding <- inbreeding_merged %>%group_by(Population) %>% #calculate population-level inbreeding statistics
summarize(Average_Inbreeding = mean(Inbreeding, na.rm = T), SD_Inbreeding = sd(Inbreeding, na.rm = T))
print(population_inbreeding %>% mutate(Average_Inbreeding = round(Average_Inbreeding, 2), SD_Inbreeding = round(SD_Inbreeding, 2))) #print population inbreeding statistics for populations (mean and SD)
## # A tibble: 8 Ă— 3
## Population Average_Inbreeding SD_Inbreeding
## <chr> <dbl> <dbl>
## 1 H.latifascia 0.08 0.1
## 2 H.lucina 0.2 0.2
## 3 H.maia 0.33 0.24
## 4 H.maia.x.lucina 0 NA
## 5 H.nevadensis 0.61 0.29
## 6 H.peigleri 0.14 0.15
## 7 H.slosseri 0.2 0.17
## 8 H.sp.GreatLakesComplex 0.16 0.15
ggplot(population_inbreeding, aes(x = reorder(Population, Average_Inbreeding), y = Average_Inbreeding)) + #bar plot of population inbreeding coefficients with SD error bars
geom_bar(stat = "identity", aes(fill = Population)) + #bar plot with fill color
geom_errorbar(aes(ymin = Average_Inbreeding - SD_Inbreeding, ymax = Average_Inbreeding + SD_Inbreeding), width = 0.2) + #add error bars
scale_fill_manual(values = viridis::magma(n = length(unique(population_inbreeding$Population)))) +
labs(x = NULL, y = "Average inbreeding coefficient (with SD)") + #axis labels
theme_classic() +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) #remove x-axis labels and ticks
We will use the corrected index of association rd (Agapow & Burt 2001) to estimate linkage between loci for each population. This is a less biased version of the original index of association by Brown et al. (1980) and accounts for the number of sampled loci. It uses a permutation test with the null hypothesis of unlinked loci.
results_list <- list() #initialize an empty list to store results
messages <- c() #initialize an empty list to store messages
get_num_samples <- function(pop) {length(indNames(popsub(pop_dataset, pop)))} #create function to get number of samples for each population
for (pop in unique(pop_dataset$pop)) { #loop through each population to compute index of association
pop_data <- popsub(pop_dataset, pop) #subset data for current population
ia_result <- ia(pop_data, sample = 100) #compute IA with 1000 permutations
p_value <- ia_result["p.Ia"] #extract p-value
ia_value <- ia_result["Ia"] #extract IA values
if (!is.na(p_value) && !is.na(ia_value)) {results_list[[pop]] <- c(IA = ia_value, p_value = p_value) #store IA and p-value if available
} else {results_list[[pop]] <- c(IA = NA, p_value = NA) #store NA if no p-value is available
messages <- c(messages, paste("For population", pop, "no p-value could be calculated"))}}
results_df <- do.call(rbind, lapply(names(results_list), function(pop) { #create data frame from results list
data.frame(Population = pop, IA = results_list[[pop]][1], p_value = results_list[[pop]][2], n = get_num_samples(pop), stringsAsFactors = F)}))
for (message in messages) {print(message)} #print message(s)
## [1] "For population H.maia.x.lucina no p-value could be calculated"
print(results_df) #print test results
## Population IA p_value n
## IA.Ia H.maia 3.981871 0.00990099 75
## IA.Ia1 H.latifascia 3.076350 0.06930693 15
## IA.Ia2 H.lucina 2.447624 0.49504950 10
## IA H.maia.x.lucina NA NA 1
## IA.Ia3 H.nevadensis 2.306872 0.00990099 22
## IA.Ia4 H.sp.GreatLakesComplex 3.227128 0.05940594 10
## IA.Ia5 H.slosseri 2.900211 0.00990099 19
## IA.Ia6 H.peigleri 3.346096 0.00990099 12
genetic_dist <- dist(sqrt(adegenet::tab(pop_dataset, freq = T)), method = "euclidean")
geo_dist <- geosphere::distm(as.matrix(dataset[, c("Longitude", "Latitude")]), fun = geosphere::distVincentySphere)
mantel_test <- ade4::mantel.randtest(as.dist(genetic_dist), as.dist(geo_dist), nrepet = 1000)
mantel_test #print Mantel test results
## Monte-Carlo test
## Call: ade4::mantel.randtest(m1 = as.dist(genetic_dist), m2 = as.dist(geo_dist),
## nrepet = 1000)
##
## Observation: 0.4374899
##
## Based on 1000 replicates
## Simulated p-value: 0.000999001
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 21.4443437587 -0.0002689246 0.0004167200
plot(mantel_test) #plot results: Black dot indicates observed correlation and histograms show permuted values
combined_data <- data.frame(genetic_dist = reshape2::melt(as.matrix(genetic_dist))$value, #combine genetic and geographic distance matrices into one data frame
geo_dist = reshape2::melt(as.matrix(geo_dist))$value)
combined_data <- combined_data[combined_data$genetic_dist != 0 & combined_data$geo_dist != 0, ] #remove self-comparisons where distance is 0
svg("IBD across all individuals.svg", width = 25/2.56, height = 15/2.56)
ggplot(combined_data, aes(x = geo_dist / 1000, y = genetic_dist)) + #plot IBD
geom_point(alpha = 0.2, color = "black", size = 1.5) + #scatter plot with points
geom_smooth(method = "loess", color = "darkgrey", linewidth = 0.9) + #LOESS smooth fit
geom_smooth(method = "lm", color = "orange", linewidth = 1.5) + #linear model fit
labs(x = "Geographic Distance (km)", y = "Genetic Distance (Edward's)", title = "Isolation-by-distance") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5), #center plot title
axis.text = element_text(color = "black")) #color axis text
population_subsets <- list() #define list to store subsets for each population
populations <- unique(pop_dataset@pop) #extract unique populations from genind object
for (pop in populations) { #create subsets for each population
pop_genind_subset <- pop_dataset[pop_dataset@pop == pop, ] #subset genind for population
pop_coords_subset <- dataset[dataset$Species == pop, ] #subset coordinates for population
population_subsets[[pop]] <- list(genind_obj = pop_genind_subset, coords_df = pop_coords_subset)} #store subsets in list
analyze_population <- function(genind_obj, coords_df) {
if (nrow(coords_df) <= 3) { #check if population has more than 3 individuals
cat("Skipping population", unique(genind_obj@pop), "due to fewer than 3 individuals.\n")
return(NULL)} #skip populations with 3 or fewer individuals
genetic_dist <- tryCatch({ #calculate genetic distance (Edwards' distance)
dist(sqrt(adegenet::tab(genind_obj, freq = T)), method = "euclidean")
}, error = function(e) {cat("Error in genetic distance calculation:", e$message, "\n")
return(NULL)})
geo_dist <- tryCatch({ #calculate geographic distance (Vincenty distance)
geosphere::distm(as.matrix(coords_df[, c("Longitude", "Latitude")]), fun = geosphere::distVincentySphere)
}, error = function(e) {cat("Error in geographic distance calculation:", e$message, "\n")
return(NULL)})
mantel_test <- NULL #perform Mantel test if both distance matrices are available
if (!is.null(genetic_dist) && !is.null(geo_dist)) {
mantel_test <- tryCatch({
ade4::mantel.randtest(as.dist(genetic_dist), as.dist(geo_dist), nrepet = 1000)
}, error = function(e) {cat("Error in Mantel test:", e$message, "\n")
return(NULL)})
} else {cat("Skipping Mantel test due to NULL distance matrices.\n")}
plot_data <- NULL #prepare plot data and generate plot if distance matrices are available
combined_data <- NULL #initialize combined_data to store genetic and geographic distances
if (!is.null(genetic_dist) && !is.null(geo_dist)) {
genetic_dist_matrix <- as.matrix(genetic_dist)
geo_dist_matrix <- as.matrix(geo_dist)
combined_data <- data.frame( #combine matrices for plotting
genetic_dist = reshape2::melt(genetic_dist_matrix)$value,
geo_dist = reshape2::melt(geo_dist_matrix)$value,
Population = unique(genind_obj@pop)) #add population information
combined_data <- combined_data[combined_data$genetic_dist != 0 & combined_data$geo_dist != 0, ] #remove self-comparisons
mantel_p_value <- ifelse(is.null(mantel_test), NA, mantel_test$pvalue) #get Mantel p-value
plot_title <- paste("Isolation-by-distance for", unique(genind_obj@pop), "\nMantel test p-value:", format(mantel_p_value, digits = 3)) #title with p-value
plot_data <- ggplot(combined_data, aes(x = geo_dist / 1000, y = genetic_dist)) + #plot genetic vs geographic distance
geom_point(alpha = 0.5, color = "black", size = 1.8) + #scatter plot with points
geom_smooth(method = "loess", color = "darkgrey", linewidth = 0.9) + #LOESS smooth fit
geom_smooth(method = "lm", color = "orange", linewidth = 1.5) + #linear model fit
labs(x = "Geographic distance (km)", y = "Genetic distance (Edwards')", title = plot_title) + #include plot title with p-value
theme_classic() +
theme(plot.title = element_text(hjust = 0.5), #center plot title
axis.text = element_text(color = "black")) #color axis text
} else {cat("Skipping plot due to missing distance matrices.\n")}
return(list(mantel_test = mantel_test, plot_data = plot_data, combined_data = combined_data, num_individuals = nrow(coords_df)))} #return combined_data
results_list <- list() #initialize results list to store Mantel test results and plots
results_df <- data.frame(Population = character(), Num_Individuals = integer(), #initialize dataframe to store results
Mantel_p_value = numeric(), stringsAsFactors = F)
all_data <- data.frame() #initialize dataframe to store combined data for all populations
for (pop in names(population_subsets)) {
cat("Analyzing population:", pop, "\n")
pop_data <- population_subsets[[pop]]$genind_obj #extract genind object for population
pop_coords <- population_subsets[[pop]]$coords_df #extract coordinates dataframe for population
results <- analyze_population(pop_data, pop_coords) #analyze population
if (!is.null(results)) { #store results if available
results_list[[pop]] <- results
results_df <- rbind(results_df, data.frame( #store results in dataframe
Population = pop, Num_Individuals = results$num_individuals,
Mantel_p_value = ifelse(is.null(results$mantel_test), NA, results$mantel_test$obs), #check if mantel_test is NULL
stringsAsFactors = F))
if (!is.null(results$combined_data)) {
all_data <- rbind(all_data, results$combined_data)} #append combined data for each population
if (!is.null(results$plot_data)) { #save plot if available
ggsave(paste0("Isolation_by_Distance_", pop, ".png"), plot = results$plot_data,
width = 8, height = 6, dpi = 300)}}}
## Analyzing population: H.maia
## Analyzing population: H.latifascia
## Analyzing population: H.lucina
## Analyzing population: H.maia.x.lucina
## Skipping population 1 due to fewer than 3 individuals.
## Analyzing population: H.nevadensis
## Analyzing population: H.sp.GreatLakesComplex
## Analyzing population: H.slosseri
## Analyzing population: H.peigleri
write.csv(results_df, "Mantel_test_results.csv", row.names = F)
svg("IBD across populations.svg", width = 25/2.56, height = 15/2.56)
ggplot(all_data, aes(x = geo_dist / 1000, y = genetic_dist, color = Population)) +
geom_point(alpha = 0.4, size = 2) +
geom_smooth(method = "loess", se = F, aes(color = Population), linewidth = 0.6) +
geom_smooth(method = "lm", se = F, aes(color = Population), linewidth = 1.6) +
scale_color_manual(values = viridis::magma(n = length(unique(all_data$Population)))) +
labs(x = "Geographic distance (km)", y = "Genetic Distance (Edwards')",
title = "Isolation-by-distance across populations") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5), #center plot title
axis.text = element_text(color = "black")) #color axis text
dev.off()
Genetic clusters are inferred through eigenvector decomposition of the allele frequencies (matrices) among individuals, aiming to reduce the complexity of genetic data by projecting it into a lower-dimensional space with each axis (principal component) capturing a portion of the variance in the allele frequencies (Patterson et al. 2006, Reich et al. 2008).
cols_pop <- viridis::magma(n = length(levels(population_assignment)), begin = 0, end = 1) #define color palette
shapes_pop <- rep(c(21, 24, 22, 25), length.out = length(cols_pop)) #define shape palette
plot_width <- 25/2.56
plot_height <- 15/2.56
point_size <- 3.5
point_alpha <- 0.9
density_plot_alpha <- 0.7
density_plot_height <- 5/2.56
point_outline_color <- "black"
point_size_3D_plot <- 3
point_alpha_3D_plot <- 1
point_size_3D_plot <- 7
population_assignment_name <- "Species"
pop_data_datasets <- list(pop_dataset = list(data = pop_dataset))
handle_missing_infinite_values <- function(data) {
data$tab <- as.matrix(data$tab)
data$tab <- data$tab[, colSums(is.na(data$tab) | is.infinite(data$tab)) < nrow(data$tab)]
data$tab <- apply(data$tab, 2, function(column) {
column_mean <- mean(column, na.rm = T)
column[is.na(column) | is.infinite(column)] <- column_mean
return(column)})
return(data)}
perform_pca_and_check <- function(data) {
data <- handle_missing_infinite_values(data)
prcomp(data$tab)}
calculate_variance_explained <- function(pca_result) {
(pca_result$sdev^2 / sum(pca_result$sdev^2)) * 100}
calculate_num_pcs_above_threshold <- function(var_expl_perc, threshold = 1) {
length(which(var_expl_perc >= threshold))}
create_scree_plot_df <- function(var_expl_perc) {
data.frame(PC = seq_along(var_expl_perc), VarianceExplained = var_expl_perc)}
create_pca_df <- function(pca_result, group) {
num_pcs <- ncol(pca_result$x) # total number of PCs
num_pcs_to_keep <- ceiling(num_pcs * 0.5) #keep 50% of total PCs
data.frame(pca_result$x[, 1:num_pcs_to_keep], group = group)}
plot_scree <- function(scree_plot_df, threshold) {
num_pcs <- nrow(scree_plot_df) #total number of PCs
num_pcs_to_keep <- ceiling(num_pcs * 0.5) #keep 50% of total PCs
ggplot(scree_plot_df[1:num_pcs_to_keep, ], aes(x = PC, y = VarianceExplained)) +
geom_bar(stat = "identity", fill = "grey") +
geom_line(aes(y = cumsum(VarianceExplained)), color = "orange") +
geom_point(aes(y = cumsum(VarianceExplained)), color = "orange", size = 2) +
labs(x = "PC", y = "Explained variance (%)") +
geom_hline(yintercept = threshold, color = "black", linetype = "dashed") +
theme_classic()}
run_pca_for_datasets <- function(datasets) {
lapply(datasets, function(subset) {
pca_result <- perform_pca_and_check(subset$data)
var_expl_perc <- calculate_variance_explained(pca_result)
num_pcs <- calculate_num_pcs_above_threshold(var_expl_perc)
scree_plot_df <- create_scree_plot_df(var_expl_perc)
print(plot_scree(scree_plot_df, threshold = 1))
list(pca_result = pca_result, variance_explained = var_expl_perc, num_pcs_above_1perc = num_pcs)})}
pca_results <- run_pca_for_datasets(pop_data_datasets)
pca_df_pop_dataset <- create_pca_df(pca_results$pop_dataset$pca_result,
factor(pop_dataset$pop, levels = unique(population_assignment)))
pca_df_pop_dataset_filtered_n2 <- pca_df_pop_dataset %>% dplyr::group_by(group) %>% dplyr::filter(n() > 1) #filter out groups for density plots with fewer than 2 data points
svg("PCA_PC1_PC2_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(pca_df_pop_dataset, aes(x = PC1, y = PC2)) + #PCA scatterplot of PC1 and PC2
geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group),
color = point_outline_color) +
scale_shape_manual(values = shapes_pop) +
scale_fill_manual(values = cols_pop) +
labs(x = sprintf("PC1 (%.2f%%)", pca_results$pop_dataset$variance_explained[1]),
y = sprintf("PC2 (%.2f%%)", pca_results$pop_dataset$variance_explained[2]),
shape = population_assignment_name,
fill = population_assignment_name) +
theme_classic() +
guides(override.aes = list(alpha = 1))
dev.off()
svg("PCA_PC1_PC3_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(pca_df_pop_dataset, aes(x = PC1, y = PC3)) + #PCA scatterplot of PC1 and PC3
geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group),
color = point_outline_color) +
scale_shape_manual(values = shapes_pop) +
scale_fill_manual(values = cols_pop) +
labs(x = sprintf("PC1 (%.2f%%)", pca_results$pop_dataset$variance_explained[1]),
y = sprintf("PC3 (%.2f%%)", pca_results$pop_dataset$variance_explained[2]),
shape = population_assignment_name,
fill = population_assignment_name) +
theme_classic() +
guides(override.aes = list(alpha = 1))
dev.off()
svg("PCA_density_PC1_pop_dataset.svg", width = plot_width, height = density_plot_height) #density plot for PC1
ggplot(pca_df_pop_dataset_filtered_n2, aes(x = PC1, fill = group, color = group)) +
geom_density(alpha = density_plot_alpha) +
scale_fill_manual(values = cols_pop) +
scale_color_manual(values = cols_pop) +
theme_void() +
theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
dev.off()
svg("PCA_density_PC2_pop_dataset.svg", width = plot_width, height = density_plot_height) #density plot for PC2
ggplot(pca_df_pop_dataset_filtered_n2, aes(x = PC2, fill = group, color = group)) +
geom_density(alpha = density_plot_alpha) +
scale_fill_manual(values = cols_pop) +
scale_color_manual(values = cols_pop) +
theme_void() +
theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
dev.off()
svg("PCA_density_PC3_pop_dataset.svg", width = plot_width, height = density_plot_height) #density plot for PC3
ggplot(pca_df_pop_dataset_filtered_n2, aes(x = PC3, fill = group, color = group)) +
geom_density(alpha = density_plot_alpha) +
scale_fill_manual(values = cols_pop) +
scale_color_manual(values = cols_pop) +
theme_void() +
theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
dev.off()
plotly::plot_ly( #3D plot of PCA using PC1-PC3
data = pca_df_pop_dataset,
x = ~PC1,
y = ~PC2,
z = ~PC3,
color = ~group, colors = cols_pop,
type = 'scatter3d', mode = 'markers',
alpha = point_alpha_3D_plot, #marker transparency
marker = list(size = point_size_3D_plot)) #marker size
Overview
Discriminant analysis of principal components (DAPC) is a multivariate
method designed to identify and describe genetic clusters by determining
the optimal number of clusters (k) that best summarizes the data. It is
faster than traditional Bayesian clustering methods and does not assume
Hardy-Weinberg Equilibrium (HWE) or linkage between loci. It maximizes
variation between groups while minimizing variation within them,
offering a distinct advantage over methods like PCA, PCoA, and MANOVA
(which focus on global diversity but neglect group-specific
differences). Furthermore, it is potentially more powerful and accurate
in phylogenetic and hierarchical analyses (including hybridization) than
methods that minimize HWE and linkage disequilibrium (e.g., STRUCTURE)
(Kanno et al 2011, Dupuis and Sperling 2015, Jombart et al. 2010).
Approach
DAPC first transforms the data through principal component analysis
(PCA) to ensure the variables submitted to discriminant analysis (DA)
are uncorrelated and that their number is less than the number of
analyzed individuals (in contrast to a standard DA), and then then
applies DA to the retained principal components, aiming to discriminate
between predefined groups. The eigenvalues in a DAPC represent the ratio
of variance between groups over the variance within groups for each
discriminant function, indicating the discriminant power of each axis in
a DAPC plot.
How many PCs to retain?
Choosing the optimal number of retained principal components (PCs) is
crucial for the effectiveness of DAPC. Retaining too many components can
lead to overfitting and instability in membership probabilities, while
retaining too few may result in a loss of valuable information. This can
be done using the a-score (representing the difference between the
proportion of successful reassignments observed in the analysis and
values obtained using random groupings) or cross-validation. The latter
optimization procedure is most suitable and finds the optimal number of
retained PCs by splitting the data into two sets: a training (usually
90% of the data) and a validation set (typically the remaining 10%). The
validation set is selected using stratified random sampling, ensuring
that every group or population from the original dataset is represented
in both sets. DAPC is then performed on the training set while retaining
different numbers of PCs. The method then assesses how well the analysis
can predict group membership for the individuals in the validation set.
This helps to identify the optimal number of PCs to retain. The process
is repeated multiple times to improve accuracy. The final number of PCs
used in the analysis should be documented for transparency, along with
evidence supporting the choice of k (Miller et al. 2020).
A priori vs de novo clustering
DAPC can be performed either using a priori defined groupings (e.g.,
populations/species) (similar to a PCA) or it can identify genetic
clusters (k) de novo (e.g., similar to Structure). This decision can
affect the DAPC results and is therefore important to consider (Miller
et al. 2020). Misspecification of groupings can lead to artificially
large populations with Wahlund-like effects of apparent depressed
heterozygosity (Wahlund 1928), with these inflated population size
estimates preventing conservation and increasing risk of extinction for
one of “cryptic” genetic clusters, or it can lead to over splitting
populations that should be combined. De novo group assignments can be
inaccurate under conditions of high migration rates or low genetic
differentiation but is mostly reliable when migration rates are low
(Miller et al. 2020). For a priori assessments, the genetic distance
between clusters reflects underlying FST values (Miller et al. 2020).
Similar to other methods, artefactual discrete groups may be identified
in populations with continuously distributed (cline-like) genetic
diversity and under spatially heterogeneous sampling of populations.
However, incorporating scatterplots for a graphical assessment of the
genetic structures between clusters can allow to assess the organization
of genetic variability and reveal potential clines.
k-means clustering
When grouping information is unknown, DAPC can incorporate k-means
clustering to identify the optimal number of clusters k (Lee et
al. 2009, Liu & Zhao 2006). The k-means algorithm maximizes
variation between groups, and the optimal k is determined by comparing
clustering solutions using the Bayesian Information Criterion (BIC),
selecting the lowest BIC value.
Additional features
DAPC also provides membership probabilities for each individual, which
are based on the retained discriminant functions. These probabilities
differ from admixture coefficients in STRUCTURE but can similarly be
interpreted as reflecting the proximity of individuals to different
genetic clusters. They also offer insights into the distinctiveness of
identified clusters and potential admixture events. This feature makes
DAPC a potentially more powerful and accurate method for analyzing
phylogenetic relationships, hierarchical structures, and hybridization
events compared to methods like STRUCTURE, which minimize assumptions
about HWE and linkage disequilibrium (Kanno et al. 2011, Dupuis and
Sperling 2015, Jombart et al. 2010). Additionally, DAPC allows users to
identify and plot alleles that contribute most significantly to the
discrimination of clusters, providing further insight into genetic
differentiation.
Use a larger number here (e.g., 10000)
crossval_N <- 10 #number of cross-validation runs
pop_dataset@tab <- matrix(
as.integer(round(apply(pop_dataset@tab, 2, function(x) {
x[is.na(x)] <- mean(x, na.rm = T) #replace NA with column means
return(x)}))), #return modified column
nrow = nrow(pop_dataset@tab), #number of rows
ncol = ncol(pop_dataset@tab), #number of columns
dimnames = dimnames(pop_dataset@tab)) #preserve original dimensions
original_populations <- unique(pop_dataset@pop) #identify populations before filtering
pop_dataset_DAPC <- pop_dataset[adegenet::pop(pop_dataset) %in% names(table(adegenet::pop(pop_dataset))[table(adegenet::pop(pop_dataset)) > 1]), ] #filter populations with fewer than 2 members
remaining_populations <- unique(pop_dataset_DAPC@pop) #identify remaining populations
removed_populations <- setdiff(original_populations, remaining_populations) #identify removed populations
if (length(removed_populations) > 0) { #check if any populations were removed
cols_pop_DAPC <- cols_pop[-(which(original_populations %in% removed_populations))] #remove colors for removed populations
shapes_pop_DAPC <- shapes_pop[-which(original_populations %in% removed_populations)] #remove shapes for removed populations
} else { #no populations were removed so keep original color and shape assignments
cols_pop_DAPC <- cols_pop
shapes_pop_DAPC <- shapes_pop}
xval_results_pop_dataset_DAPC_run1 <- adegenet::xvalDapc( #run1 of cross-validation
pop_dataset_DAPC@tab, #genetic data matrix (allele frequencies)
adegenet::pop(pop_dataset_DAPC), #population assignment (grouping factor) of individuals
training.set = 0.9, #use 90% of data for training during cross-validation
n.pca.max = ceiling(min(nrow(pop_dataset_DAPC@tab), ncol(pop_dataset_DAPC@tab)) * 0.6), #set maximum number of PCs to evaluate in cross-validation to 60% of total numbers of PCs
result = "groupMean", #use group means to calculate the cross-validation result
center = T, #center data before performing PCA
scale = F, #do not scale data (i.e., do not standardize allele frequencies)
n.rep = crossval_N, #set number of cross-validation replicates
xval.plot = T) #plot cross-validation results (mean squared error (MSE) vs. number of PCs)
optimal_pcs_pop_dataset_DAPC_run1 <- xval_results_pop_dataset_DAPC_run1[["Number of PCs Achieving Lowest MSE"]] #extract optimal number of PCs corresponding to lowest MSE from cross-validation results
print(optimal_pcs_pop_dataset_DAPC_run1) #print optimal number of PCs for dataset
## [1] "40"
xval_results_pop_dataset_DAPC_run2 <- adegenet::xvalDapc( #run2 of cross-validation
pop_dataset_DAPC@tab, adegenet::pop(pop_dataset_DAPC),
training.set = 0.9, result = "groupMean", center = T, scale = F, n.rep = crossval_N, xval.plot = T,
n.pca = seq(as.numeric(optimal_pcs_pop_dataset_DAPC_run1) - 15, as.numeric(optimal_pcs_pop_dataset_DAPC_run1) + 15, by = 1)) #refine PCs range
optimal_pcs_pop_dataset_DAPC_run2 <- xval_results_pop_dataset_DAPC_run2[["Number of PCs Achieving Lowest MSE"]] #extract refined optimal PCs
print(xval_results_pop_dataset_DAPC_run2[2:6]) #print cross-validation results
## $`Median and Confidence Interval for Random Chance`
## 2.5% 50% 97.5%
## 0.0977508 0.1433356 0.1981986
##
## $`Mean Successful Assignment by Number of PCs of PCA`
## 25 26 27 28 29 30 31 32
## 0.8285714 0.8081633 0.7989796 0.7928571 0.7765306 0.8530612 0.7938776 0.8724490
## 33 34 35 36 37 38 39 40
## 0.8806122 0.8663265 0.8326531 0.8918367 0.8387755 0.8448980 0.8173469 0.8928571
## 41 42 43 44 45 46 47 48
## 0.8459184 0.8377551 0.8959184 0.8642857 0.8632653 0.8520408 0.8336735 0.8806122
## 49 50 51 52 53 54 55
## 0.8418367 0.8479592 0.8602041 0.9367347 0.8346939 0.8469388 0.8336735
##
## $`Number of PCs Achieving Highest Mean Success`
## [1] "52"
##
## $`Root Mean Squared Error by Number of PCs of PCA`
## 25 26 27 28 29 30 31 32
## 0.1999583 0.2191860 0.2244666 0.2364839 0.2317697 0.2013594 0.2514846 0.1660164
## 33 34 35 36 37 38 39 40
## 0.1417229 0.1592948 0.2025966 0.1372060 0.1922596 0.1864306 0.2146498 0.1406165
## 41 42 43 44 45 46 47 48
## 0.1920700 0.2010230 0.1267937 0.1567913 0.1682281 0.2015920 0.2180190 0.1462783
## 49 50 51 52 53 54 55
## 0.1721135 0.1846065 0.1791100 0.1069240 0.2158833 0.2068184 0.2150860
##
## $`Number of PCs Achieving Lowest MSE`
## [1] "52"
dapc_result_pop_dataset <- adegenet::dapc(
pop_dataset_DAPC, pop = adegenet::pop(pop_dataset_DAPC),
n.pca = as.numeric(optimal_pcs_pop_dataset_DAPC_run2),
n.da = length(levels(pop_dataset_DAPC$pop)) - 1) #perform DAPC with optimal PCs (determined by crossvalidation)
save(xval_results_pop_dataset_DAPC_run1, optimal_pcs_pop_dataset_DAPC_run1,
xval_results_pop_dataset_DAPC_run2, optimal_pcs_pop_dataset_DAPC_run2,
dapc_result_pop_dataset,
file = "DAPC_results_pop_dataset.Rdata")
load("DAPC_results_pop_dataset.Rdata")
print(round(dapc_result_pop_dataset$var, 2) * 100) #conserved variance in percent
## [1] 87
barplot(dapc_result_pop_dataset$eig, names.arg = paste0("LD", seq_along(dapc_result_pop_dataset$eig)), #plot eigenvalues of discriminant functions
xlab = "Linear discriminants", ylab = "Eigenvalues") #
round(dapc_result_pop_dataset$eig, 1) #show eigenvalues of discriminant functions
## [1] 953.8 261.1 116.6 83.0 63.9 25.2
dapc_relative_LDs <- round(dapc_result_pop_dataset$eig / sum(dapc_result_pop_dataset$eig) * 100, 1)
barplot(dapc_relative_LDs, #plot relative proportions of eigenvalues of discriminant functions
names.arg = paste0("LD", seq_along(dapc_result_pop_dataset$eig)),
xlab = "Linear discriminants", ylab = "Relative proportion of eigenvalues [%]")
LD1_label <- paste("LD1 (", dapc_relative_LDs[1], "%)", sep = "") #create axis labels for LD1 showing relative proportions of eigenvalues of discriminant functions in percent
LD2_label <- paste("LD2 (", dapc_relative_LDs[2], "%)", sep = "") #create axis labels for LD2 showing relative proportions of eigenvalues of discriminant functions in percent
LD3_label <- paste("LD3 (", dapc_relative_LDs[3], "%)", sep = "") #create axis labels for LD3 showing relative proportions of eigenvalues of discriminant functions in percent
dapc_df_pop_dataset <- data.frame(dapc_result_pop_dataset$ind.coord, group = factor(dapc_result_pop_dataset$grp, levels = levels(population_assignment))) #create dataframe for visualization
svg("DAPC_LD1_LD2_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_df_pop_dataset, aes(x = LD1, y = LD2)) + #DAPC scatterplot of LD1 vs LD2
geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
scale_shape_manual(values = shapes_pop_DAPC) +
scale_fill_manual(values = cols_pop_DAPC) +
labs(shape = population_assignment_name, fill = population_assignment_name, x = LD1_label, y = LD2_label) +
theme_classic() +
guides(override.aes = list(alpha = 1))
dev.off()
svg("DAPC_LD1_LD3_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_df_pop_dataset, aes(x = LD1, y = LD3)) + #DAPC scatterplot of LD1 vs LD3
geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
scale_shape_manual(values = shapes_pop_DAPC) +
scale_fill_manual(values = cols_pop_DAPC) +
labs(shape = population_assignment_name, fill = population_assignment_name, x = LD1_label, y = LD3_label) +
theme_classic() +
guides(override.aes = list(alpha = 1))
dev.off()
svg("DAPC_densi_LD1_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_df_pop_dataset, aes(x = LD1, fill = group)) + #density plot for LD1
geom_density(alpha = density_plot_alpha) +
scale_fill_manual(values = cols_pop) +
theme_void() +
theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
dev.off()
svg("DAPC_densi_LD2_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_df_pop_dataset, aes(x = LD2, fill = group)) + #density plot for LD2
geom_density(alpha = density_plot_alpha) +
scale_fill_manual(values = cols_pop) +
theme_void() +
theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
dev.off()
svg("DAPC_densi_LD3_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_df_pop_dataset, aes(x = LD3, fill = group)) + #density plot for LD3
geom_density(alpha = density_plot_alpha) +
scale_fill_manual(values = cols_pop) +
theme_void() +
theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
dev.off()
plotly::plot_ly( #3D plot of DAPC using LD1-LD3
data = dapc_df_pop_dataset,
x = ~LD1, y = ~LD2, z = ~LD3,
color = ~group, colors = cols_pop_DAPC,
type = 'scatter3d', mode = 'markers',
alpha = point_alpha_3D_plot, #marker transparency
marker = list(size = point_size_3D_plot)) #marker size
Plot the DAPC with scaled LD axes according to proportion of variance explained by each respective eigenvalue to ensure that distances between points in 3D plot reflect each discriminant function’s contribution
eig_proportions_dapc <- dapc_result_pop_dataset$eig / sum(dapc_result_pop_dataset$eig) #calculate relative proportions of eigenvalues
dapc_df_pop_dataset_scaled <- dapc_df_pop_dataset
dapc_df_pop_dataset_scaled$LD1 <- dapc_df_pop_dataset_scaled$LD1 * sqrt(eig_proportions_dapc[1]) #scale LD1
dapc_df_pop_dataset_scaled$LD2 <- dapc_df_pop_dataset_scaled$LD2 * sqrt(eig_proportions_dapc[2]) #scale LD2
dapc_df_pop_dataset_scaled$LD3 <- dapc_df_pop_dataset_scaled$LD3 * sqrt(eig_proportions_dapc[3]) #scale LD3
ggplot(dapc_df_pop_dataset_scaled, aes(x = LD1, y = LD2)) + #DAPC scaled scatterplot of LD1 vs LD2
geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
scale_shape_manual(values = shapes_pop_DAPC) +
scale_fill_manual(values = cols_pop_DAPC) +
labs(shape = population_assignment_name, fill = population_assignment_name, x = paste("LD1 (scaled)"), y = paste("LD2 (scaled)")) +
theme_classic() +
xlim(range(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2)))) +
ylim(range(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2)))) +
guides(override.aes = list(alpha = 1))
ggplot(dapc_df_pop_dataset_scaled, aes(x = LD1, y = LD3)) + #DAPC scaled scatterplot of LD1 vs LD3
geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
scale_shape_manual(values = shapes_pop_DAPC) +
scale_fill_manual(values = cols_pop_DAPC) +
labs(shape = population_assignment_name, fill = population_assignment_name, x = paste("LD1 (scaled)"), y = paste("LD3 (scaled)")) +
theme_classic() +
xlim(range(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD3)))) +
ylim(range(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD3)))) +
guides(override.aes = list(alpha = 1))
plotly::plot_ly( #3D plot of DAPC_kmeans using scaled LD1-LD3
data = dapc_df_pop_dataset_scaled,
x = ~LD1, y = ~LD2, z = ~LD3,
color = ~group, colors = cols_pop_DAPC,
type = 'scatter3d', mode = 'markers',
alpha = point_alpha_3D_plot, marker = list(size = point_size_3D_plot)
) %>% layout(scene = list(xaxis = list(title = "LD1", range = c(min(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2), range(dapc_df_pop_dataset_scaled$LD3))), max(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2), range(dapc_df_pop_dataset_scaled$LD3))))),
yaxis = list(title = "LD2", range = c(min(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2), range(dapc_df_pop_dataset_scaled$LD3))), max(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2), range(dapc_df_pop_dataset_scaled$LD3))))),
zaxis = list(title = "LD3", range = c(min(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2), range(dapc_df_pop_dataset_scaled$LD3))), max(c(range(dapc_df_pop_dataset_scaled$LD1), range(dapc_df_pop_dataset_scaled$LD2), range(dapc_df_pop_dataset_scaled$LD3)))))))
dapc_group_memberships <- as.data.frame(dapc_result_pop_dataset$posterior) #convert posterior probabilities to data frame
dapc_group_memberships$ID <- row.names(dapc_group_memberships) #add ID column
dapc_group_memberships$Population <- pop_dataset$pop[match(dapc_group_memberships$ID, indNames(pop_dataset))] #match populations to IDs
dapc_group_memberships_long <- melt(dapc_group_memberships, id.vars = c("ID", "Population"), variable.name = "Cluster", value.name = "Probability") #reshape to long format
svg("DAPC_membership_probabilities.svg", width = 30/2.56, height = 25/2.56)
ggplot(dapc_group_memberships_long, aes(x = Probability, y = ID, fill = Cluster)) + #create Structure-like plots
geom_bar(stat = "identity") +
theme_classic() +
scale_fill_manual(values = cols_pop_DAPC) + #use viridis color palette
facet_wrap(~ Population, scales = "free_y") + #split y-axis by population
labs(fill = "Cluster") +
theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 4), #modify size of y-axis labels
axis.text.x = element_blank(), #remove x-axis labels
axis.title.y = element_blank(), #remove y-axis title
axis.title.x = element_blank(), #remove x-axis title
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.background = element_blank())
dev.off()
dapc_loading_LD1 <- adegenet::loadingplot(dapc_result_pop_dataset$var.contr[, 1], lab.jitter = 100, #plot loading for first LD axis
thres = quantile(dapc_result_pop_dataset$var.contr[, 1], 0.99)) #set threshold to 99th percentile (top 1% of loadings)
dapc_loading_LD2 <- adegenet::loadingplot(dapc_result_pop_dataset$var.contr[, 2], lab.jitter = 100, #plot loading for second LD axis
thres = quantile(dapc_result_pop_dataset$var.contr[, 2], 0.99)) #set threshold to 99th percentile (top 1% of loadings)
dapc_loading_LD1$var.names #print most differentiated alleles for LD1
## [1] "ctg00000001_1_12521730.0" "ctg00000001_1_12521730.1"
## [3] "ctg00000003_1_4110250.0" "ctg00000003_1_4110250.1"
## [5] "ctg00000005_1_16585380.0" "ctg00000005_1_16585380.1"
dapc_loading_LD2$var.names #print most differentiated alleles for LD2
## [1] "ctg00000018_1_4211457.0" "ctg00000018_1_4211457.1"
## [3] "ctg00000018_1_10958111.0" "ctg00000018_1_10958111.1"
## [5] "ctg00000018_1_12472987.0" "ctg00000018_1_12472987.1"
intersect(dapc_loading_LD1$var.names, dapc_loading_LD2$var.names) #show differentiated alleles for both LD1 and LD2
## character(0)
pop_dataset_DAPC_loci <- adegenet::seploc(pop_dataset_DAPC) #separate loci
snp_names <- names(pop_dataset_DAPC_loci) #extract loci names
snp_data <- lapply(snp_names, function(snp) { #separate loci and process SNPs
if (snp %in% names(pop_dataset_DAPC_loci)) {
adegenet::tab(pop_dataset_DAPC_loci[[snp]]) #process SNP if exists
} else {NULL}}) #return NULL if SNP does not exist
snp_frequencies <- lapply(snp_data, function(data) { #calculate allele frequencies per population
if (!is.null(data)) {
apply(data, 2, function(e) tapply(e, adegenet::pop(pop_dataset_DAPC), mean, na.rm = T)) #calculate frequencies
} else {NULL}}) #return NULL for non-existent data
for (i in seq_along(snp_frequencies)) { #print allele frequencies of differentiated SNPs
if (!is.null(snp_frequencies[[i]])) {
cat("Allele frequencies for SNP:", snp_names[i], "\n")
print(snp_frequencies[[i]])
} else {cat("No data for SNP:", snp_names[i], "\n")}}
## Allele frequencies for SNP: Chromosome24_358443
## Chromosome24_358443.1 Chromosome24_358443.0
## H.maia 0.62666667 1.3733333
## H.latifascia 0.40000000 1.6000000
## H.lucina 0.40000000 1.6000000
## H.nevadensis 0.09090909 1.9090909
## H.sp.GreatLakesComplex 0.60000000 1.4000000
## H.slosseri 1.10526316 0.8947368
## H.peigleri 0.41666667 1.5833333
## Allele frequencies for SNP: Chromosome24_358452
## Chromosome24_358452.0 Chromosome24_358452.1
## H.maia 1.960000 0.04000000
## H.latifascia 1.933333 0.06666667
## H.lucina 2.000000 0.00000000
## H.nevadensis 2.000000 0.00000000
## H.sp.GreatLakesComplex 1.900000 0.10000000
## H.slosseri 2.000000 0.00000000
## H.peigleri 2.000000 0.00000000
## Allele frequencies for SNP: Chromosome24_358458
## Chromosome24_358458.0 Chromosome24_358458.1
## H.maia 1.6533333 0.3466667
## H.latifascia 1.2666667 0.7333333
## H.lucina 1.1000000 0.9000000
## H.nevadensis 0.5454545 1.4545455
## H.sp.GreatLakesComplex 1.4000000 0.6000000
## H.slosseri 1.4736842 0.5263158
## H.peigleri 1.6666667 0.3333333
## Allele frequencies for SNP: Chromosome24_358474
## Chromosome24_358474.0 Chromosome24_358474.1
## H.maia 2.0 0.0
## H.latifascia 2.0 0.0
## H.lucina 2.0 0.0
## H.nevadensis 2.0 0.0
## H.sp.GreatLakesComplex 1.7 0.3
## H.slosseri 2.0 0.0
## H.peigleri 2.0 0.0
## Allele frequencies for SNP: Chromosome24_358511
## Chromosome24_358511.0 Chromosome24_358511.1
## H.maia 2.0 0.0
## H.latifascia 1.8 0.2
## H.lucina 2.0 0.0
## H.nevadensis 2.0 0.0
## H.sp.GreatLakesComplex 2.0 0.0
## H.slosseri 2.0 0.0
## H.peigleri 2.0 0.0
dapc_group_memberships <- as.data.frame(dapc_result_pop_dataset$posterior) #convert posterior probabilities to data frame
subset_size <- 40 #define subset size
plot_assignments <- function(dapc_result, subset_size, max_rows) { #plot group memberships (heat colors represent membership probabilities with red=1 and white=0, blue crosses represent prior Cluster_assignment provided to DAPC)
num_plots <- ceiling(max_rows / subset_size) #calculate number of plots needed
for (i in 0:(num_plots - 1)) {
start_row <- i * subset_size + 1 #start index for subset
end_row <- min((i + 1) * subset_size, max_rows) #end index for subset
cat("Plotting rows", start_row, "to", end_row, "\n") #print status
adegenet::assignplot(dapc_result, subset = start_row:end_row)}} #plot assignments for current subset
plot_assignments(dapc_result_pop_dataset, subset_size, nrow(dapc_group_memberships)) #plot all subsets
## Plotting rows 1 to 40
## Plotting rows 41 to 80
## Plotting rows 81 to 120
## Plotting rows 121 to 160
## Plotting rows 161 to 163
dapc_result_pop_dataset$grp <- factor(dapc_result_pop_dataset$grp, levels = unique(pop_dataset_DAPC$pop))
admixed_individuals <- which(apply(dapc_result_pop_dataset$posterior, 1, function(e) all(e < 0.80))) #identify admixed individuals (posterior probabilities less than 0.80)
admixed_individuals #show admixed individuals
## H.slosseri_TX_Hmg028 H.sp.GreatLakesComplex_MI_DR89
## 52 149
if (length(admixed_individuals) > 0) {
adegenet::compoplot(dapc_result_pop_dataset, col.pal = cols_pop_DAPC, subset = admixed_individuals)}
threshold_high <- 0.75 #define higher threshold (probabilities above this value will assign individual to single Cluster_assignment)
threshold_low <- 1 - threshold_high #define lower threshold (individuals with probabilities above this value but below higher threshold will be assigned to multiple clusters)
admixed_individuals_cluster <- "High admixture" #set name of admixed individuals (remaining individuals belonging to multiple clusters)
dapc_result_pop_dataset_posterior <- as.data.frame(dapc_result_pop_dataset$posterior)
dapc_result_pop_dataset_posterior[sapply(dapc_result_pop_dataset_posterior, is.numeric)] <- round(dapc_result_pop_dataset_posterior[sapply(dapc_result_pop_dataset_posterior, is.numeric)], 2) #round numeric columns
dapc_result_pop_dataset_posterior$ID <- rownames(dapc_result_pop_dataset_posterior) #add ID column
assign_clusters <- function(probs) {
if (any(probs > threshold_high)) {
assigned_cluster <- names(probs)[which.max(probs)]
} else if (any(probs > threshold_low)) {
shared_clusters <- names(probs)[probs > threshold_low]
if (length(shared_clusters) > 3) {
assigned_cluster <- admixed_individuals_cluster
} else {assigned_cluster <- paste(shared_clusters, collapse = "+")}
} else {assigned_cluster <- admixed_individuals_cluster}
return(assigned_cluster)}
dapc_result_pop_dataset_posterior$Assigned_cluster <- apply(dapc_result_pop_dataset_posterior[1:length(unique(dapc_df_pop_dataset$group))], 1, assign_clusters) #apply function to each individual
dapc_final_dataset <- left_join(dataset, dapc_result_pop_dataset_posterior, by = "ID") #combine datasets
DAPC_map_point_jitter <- 0 #set degree of jitter (to avoid overlapping points)
DAPC_map_point_alpha <- 0.8 #set transparency of points
DAPC_map_point_outline_color <- "black" #modify color of point outlines
DAPC_map_point_size <- 4 #set size of points
DAPC_map_color_palette <- viridis::mako #set colorblind-friendly viridis color palette
DAPC_kmeans_map_color_palette <- viridis::viridis #set colorblind-friendly viridis color palette
DAPC_map_buffer_percentage <- 0.2 #modify buffer around range of coordinates for map
DAPC_map_state_border_color <- "gray30" #modify state border color for map
DAPC_map_state_border_thickness <- 0.3 #modify state border thickness for map
DAPC_map_country_border_color <- "gray30" #modify country border color for map
DAPC_map_country_border_thickness <- 0.8 #modify country border thickness for map
DAPC_map_width <- 25/2.54 #modify width of plot for map
DAPC_map_height <- 15/2.54 #modify height of plot for map
DAPC_map_filling_color <- "lightgrey" #modify map filling color
dapc_dataset_map <- dapc_final_dataset %>% tidyr::drop_na() #remove empty rows for plotting
dapc_dataset_map$Longitude <- as.numeric(dapc_dataset_map$Longitude) #ensure longitude is numeric
dapc_dataset_map$Latitude <- as.numeric(dapc_dataset_map$Latitude) #ensure latitude is numeric
xlim_range <- range(dapc_dataset_map$Longitude, na.rm = T) #calculate longitude range
ylim_range <- range(dapc_dataset_map$Latitude, na.rm = T) #calculate latitude range
x_buffer <- diff(xlim_range) * DAPC_map_buffer_percentage #calculate longitude buffer
y_buffer <- diff(ylim_range) * DAPC_map_buffer_percentage #calculate latitude buffer
xlim_range <- c(xlim_range[1] - x_buffer, xlim_range[2] + x_buffer) #apply longitude buffer
ylim_range <- c(ylim_range[1] - y_buffer, ylim_range[2] + y_buffer) #apply latitude buffer
dapc_dataset_map$Assigned_cluster <- factor(dapc_dataset_map$Assigned_cluster, levels = c(unique(dapc_dataset_map$Assigned_cluster) %>% .[!stringr::str_detect(., "\\+")] %>% sort(), unique(dapc_dataset_map$Assigned_cluster) %>% .[stringr::str_detect(., "\\+")] %>% sort(), admixed_individuals_cluster)) #reorder factor levels in dataset so that single clusters are first, followed by multiple clusters and admixed_individuals_cluster
svg("DAPC_cluster_map.svg", width = DAPC_map_width, height = DAPC_map_height)
map_DAPC_cluster <- ggplot(data = dapc_dataset_map, aes(x = Longitude, y = Latitude)) +
theme_void() + #use theme_void for no background
borders("world", colour = DAPC_map_country_border_color, fill = DAPC_map_filling_color,
linewidth = DAPC_map_country_border_thickness) +
geom_polygon(data = map_data("state"), aes(x = long, y = lat, group = group), #add state borders (only for USA!)
fill = NA, colour = DAPC_map_state_border_color,
linewidth = DAPC_map_state_border_thickness) +
geom_point(aes(fill = Assigned_cluster, shape = Assigned_cluster),
alpha = DAPC_map_point_alpha, color = DAPC_map_point_outline_color,
size = DAPC_map_point_size) +
geom_jitter(aes(fill = Assigned_cluster, shape = Assigned_cluster),
width = DAPC_map_point_jitter, height = DAPC_map_point_jitter,
alpha = DAPC_map_point_alpha, color = DAPC_map_point_outline_color,
size = DAPC_map_point_size) +
coord_fixed(xlim = xlim_range, ylim = ylim_range) + #set coordinates with buffer
scale_fill_manual(values = DAPC_map_color_palette(n = length(unique(dapc_dataset_map$Assigned_cluster)), begin = 0, end = 1)) + #assign colors
scale_shape_manual(values = rep(c(21, 22, 24, 25), length.out = length(unique(dapc_dataset_map$Assigned_cluster)))) + #assign shapes
guides(fill = guide_legend(override.aes = list(size = DAPC_map_point_size, alpha = 1)),
shape = guide_legend(override.aes = list(size = DAPC_map_point_size, alpha = 1))) +
labs(fill = "Cluster", shape = "Cluster") #add labels
suppressWarnings(print(map_DAPC_cluster)) #print map and suppress warnings
dev.off() #close svg device
names(dapc_df_pop_dataset)[names(dapc_df_pop_dataset) == "group"] <- "Cluster"
dapc_df_pop_dataset$ID <- row.names(dapc_df_pop_dataset)
dapc_final_dataset <- left_join(dapc_df_pop_dataset, dapc_final_dataset, by = "ID")
dapc_final_dataset$Assigned_cluster <- factor(dapc_final_dataset$Assigned_cluster, levels = c(unique(dapc_final_dataset$Assigned_cluster) %>% .[!stringr::str_detect(., "\\+")] %>% sort(), unique(dapc_final_dataset$Assigned_cluster) %>% .[stringr::str_detect(., "\\+")] %>% sort(), admixed_individuals_cluster)) #reorder factor levels in dataset so that single clusters are first, followed by multiple clusters and admixed_individuals_cluster
write.table(dapc_final_dataset[order(dapc_final_dataset$ID), ], #sort by ID
file = "DAPC_final_dataset_assignment_probabilities_clusters",
sep = "\t", dec = ",", row.names = F, col.names = T, quote = F)
svg("DAPC_LD1_LD2_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_final_dataset, aes(x = LD1, y = LD2)) + #DAPC scatterplot of LD1 vs LD2
geom_point(size = point_size, alpha = point_alpha,
aes(shape = Assigned_cluster, fill = Assigned_cluster), color = point_outline_color) +
scale_shape_manual(values = rep(c(21, 22, 24, 25), length.out = length(unique(dapc_final_dataset$Assigned_cluster)))) +
scale_fill_manual(values = DAPC_map_color_palette(n = length(unique(dapc_final_dataset$Assigned_cluster)), begin = 0, end = 1)) +
labs(shape = population_assignment_name, fill = population_assignment_name) +
theme_classic()
dev.off()
svg("DAPC_LD1_LD3_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_final_dataset, aes(x = LD1, y = LD3)) + #DAPC scatterplot
geom_point(size = point_size, alpha = point_alpha,
aes(shape = Assigned_cluster, fill = Assigned_cluster), color = point_outline_color) +
scale_shape_manual(values = rep(c(21, 22, 24, 25), length.out = length(unique(dapc_final_dataset$Assigned_cluster)))) +
scale_fill_manual(values = DAPC_map_color_palette(n = length(unique(dapc_final_dataset$Assigned_cluster)), begin = 0, end = 1)) +
labs(shape = population_assignment_name, fill = population_assignment_name) +
theme_classic()
dev.off()
plotly::plot_ly( #3D plot of DAPC using LD1-LD3
data = dapc_final_dataset,
x = ~LD1,
y = ~LD2,
z = ~LD3,
color = ~Assigned_cluster,
colors = (DAPC_map_color_palette(n = length(unique(dapc_final_dataset$Assigned_cluster)), begin = 0, end = 1)),
type = 'scatter3d', mode = 'markers',
alpha = point_alpha_3D_plot, #marker transparency
marker = list(size = point_size_3D_plot)) #marker size
clusters_kmeans_pop_dataset_DAPC <- find.clusters(pop_dataset_DAPC, #apply k-means clustering to identify grouping
n.iter = 100000, #number of iterations per run
n.start = 100, #number of randomly chosen starting centroids
max.n.clusters = 50, #evaluate up to k clusters
n.pca = length(indNames(pop_dataset_DAPC))) #retain all PCs
round(sort(clusters_kmeans_pop_dataset_DAPC$Kstat), 2) #BIC for each k sorted
clusters_assignment_pop_dataset_DAPC <- as.data.frame(clusters_kmeans_pop_dataset_DAPC$grp) #assignment of each individual to the chosen number of clusters
colnames(clusters_assignment_pop_dataset_DAPC)[1] <- "Cluster_ID" #rename first column
clusters_kmeans_pop_dataset_DAPC$size #number of individuals assigned to each Cluster_assignment
## [1] 13 23 30 9 11 13 64
pop_dataset_DAPC@tab <- matrix(
as.integer(round(apply(pop_dataset_DAPC@tab, 2, function(x) {
x[is.na(x)] <- mean(x, na.rm = T) #replace NA with column means
return(x)}))), #return modified column
nrow = nrow(pop_dataset_DAPC@tab), #number of rows
ncol = ncol(pop_dataset_DAPC@tab), #number of columns
dimnames = dimnames(pop_dataset_DAPC@tab)) #preserve original dimensions
xval_results_pop_dataset_DAPC_kmeans_run1 <- adegenet::xvalDapc( #run1
pop_dataset_DAPC@tab, #genetic data matrix (allele frequencies)
clusters_assignment_pop_dataset_DAPC$Cluster_ID, #population assignment (grouping factor) of individuals using Cluster_assignment IDs
training.set = 0.9, #use 90% of data for training during cross-validation
n.pca.max = ceiling(min(nrow(pop_dataset_DAPC@tab), ncol(pop_dataset_DAPC@tab)) * 0.6), #set maximum number of PCs to evaluate in cross-validation to 60% of total numbers of PCs
result = "groupMean", #use group means to calculate the cross-validation result
center = T, #center data before performing PCA
scale = F, #do not scale data (i.e., do not standardize allele frequencies)
n.rep = crossval_N, #set number of cross-validation replicates
xval.plot = T) #plot cross-validation results (mean squared error (MSE) vs. number of PCs)
optimal_pcs_pop_dataset_DAPC_kmeans_run1 <- xval_results_pop_dataset_DAPC_kmeans_run1[["Number of PCs Achieving Lowest MSE"]] #extract optimal PCs
print(optimal_pcs_pop_dataset_DAPC_kmeans_run1) #print optimal PCs
## [1] "20"
xval_results_pop_dataset_DAPC_kmeans_run2 <- adegenet::xvalDapc( #run2
pop_dataset_DAPC@tab, clusters_assignment_pop_dataset_DAPC$Cluster_ID, training.set = 0.9,
result = "groupMean", center = T, scale = F, n.rep = crossval_N, xval.plot = T,
n.pca = seq(as.numeric(optimal_pcs_pop_dataset_DAPC_kmeans_run1) - 15,
as.numeric(optimal_pcs_pop_dataset_DAPC_kmeans_run1) + 15, by = 1)) #refine PCs range
optimal_pcs_pop_dataset_DAPC_kmeans_run2 <- xval_results_pop_dataset_DAPC_kmeans_run2[["Number of PCs Achieving Lowest MSE"]] #extract refined optimal PCs
print(xval_results_pop_dataset_DAPC_kmeans_run2[2:6]) #print cross-validation results
## $`Median and Confidence Interval for Random Chance`
## 2.5% 50% 97.5%
## 0.0922420 0.1393075 0.1962677
##
## $`Mean Successful Assignment by Number of PCs of PCA`
## 5 6 7 8 9 10 11 12
## 0.9265306 0.9557823 0.9571429 0.9469388 0.9748299 0.9680272 0.9646259 0.9551020
## 13 14 15 16 17 18 19 20
## 0.9748299 0.9721088 0.9809524 0.9836735 0.9789116 0.9836735 0.9741497 0.9646259
## 21 22 23 24 25 26 27 28
## 0.9857143 0.9857143 0.9836735 0.9836735 0.9836735 0.9904762 0.9714286 0.9646259
## 29 30 31 32 33 34 35
## 0.9551020 0.9482993 0.9789116 0.9455782 0.9761905 0.9666667 0.9673469
##
## $`Number of PCs Achieving Highest Mean Success`
## [1] "26"
##
## $`Root Mean Squared Error by Number of PCs of PCA`
## 5 6 7 8 9 10 11
## 0.07780124 0.05334826 0.05429407 0.06877144 0.04098604 0.05095234 0.04563404
## 12 13 14 15 16 17 18
## 0.06242215 0.03501915 0.04537981 0.03688556 0.03428463 0.03744587 0.03027020
## 19 20 21 22 23 24 25
## 0.03428463 0.04771613 0.02608203 0.02608203 0.02686860 0.02686860 0.02686860
## 26 27 28 29 30 31 32
## 0.02129589 0.05634362 0.05467628 0.07250565 0.07076138 0.03080063 0.06595483
## 33 34 35
## 0.03367175 0.04517540 0.04933732
##
## $`Number of PCs Achieving Lowest MSE`
## [1] "26"
dapc_result_pop_dataset_kmeans <- dapc(pop_dataset_DAPC, pop = clusters_assignment_pop_dataset_DAPC$Cluster_ID,
n.pca = as.numeric(optimal_pcs_pop_dataset_DAPC_kmeans_run2), #optimal number of PCs from cross-validation
n.da = length(unique(clusters_assignment_pop_dataset_DAPC$Cluster_ID)) - 1) #number of discriminant functions (clusters - 1)
save(xval_results_pop_dataset_DAPC_kmeans_run1, optimal_pcs_pop_dataset_DAPC_kmeans_run1,
xval_results_pop_dataset_DAPC_kmeans_run2, optimal_pcs_pop_dataset_DAPC_kmeans_run2,
dapc_result_pop_dataset_kmeans,
file = "DAPC_results_pop_dataset_kmeans.Rdata")
load("DAPC_results_pop_dataset_kmeans.Rdata")
print(round(dapc_result_pop_dataset_kmeans$var, 2) * 100) #conserved variance in percent
## [1] 70
barplot(dapc_result_pop_dataset_kmeans$eig, names.arg = paste0("LD", seq_along(dapc_result_pop_dataset_kmeans$eig)), #plot eigenvalues of discriminant functions
xlab = "Linear discriminants", ylab = "Eigenvalues")
round(dapc_result_pop_dataset_kmeans$eig, 1) #show eigenvalues of discriminant functions
## [1] 796.8 523.4 225.1 87.3 77.2 52.5
dapc_kmeans_relative_LDs <- round(dapc_result_pop_dataset_kmeans$eig / sum(dapc_result_pop_dataset_kmeans$eig) * 100, 1)
barplot(dapc_kmeans_relative_LDs, #plot relative proportions of eigenvalues of discriminant functions
names.arg = paste0("LD", seq_along(dapc_result_pop_dataset_kmeans$eig)),
xlab = "Linear discriminants", ylab = "Relative proportion of eigenvalues [%]")
cols_pop_DAPC_kmeans <- viridis::viridis(n = length(unique(clusters_assignment_pop_dataset_DAPC$Cluster_ID)), begin = 0, end = 1) #define color palette
shapes_pop_DAPC_kmeans <- rep(c(21, 24, 22, 25), length.out = length(unique(clusters_assignment_pop_dataset_DAPC$Cluster_ID))) #define shape palette
dapc_kmeans_df_pop_dataset <- data.frame(dapc_result_pop_dataset_kmeans$ind.coord, group = dapc_result_pop_dataset_kmeans$grp) #create dataframe for visualization
LD1_label <- paste("LD1 (", dapc_kmeans_relative_LDs[1], "%)", sep = "") #create axis labels for LD1 showing relative proportions of eigenvalues of discriminant functions in percent
LD2_label <- paste("LD2 (", dapc_kmeans_relative_LDs[2], "%)", sep = "") #create axis labels for LD2 showing relative proportions of eigenvalues of discriminant functions in percent
LD3_label <- paste("LD3 (", dapc_kmeans_relative_LDs[3], "%)", sep = "") #create axis labels for LD3 showing relative proportions of eigenvalues of discriminant functions in percent
svg("DAPC_kmeans_LD1_LD2_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD1, y = LD2)) + #kmeans DAPC scatterplot of LD1 vs LD2
geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
scale_shape_manual(values = shapes_pop_DAPC_kmeans) +
scale_fill_manual(values = cols_pop_DAPC_kmeans) +
labs(shape = population_assignment_name, fill = population_assignment_name, x = LD1_label, y = LD2_label) +
theme_classic() +
guides(override.aes = list(alpha = 1)) +
coord_equal()
dev.off()
svg("DAPC_kmeans_LD1_LD3_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD1, y = LD3)) + #kmeans DAPC scatterplot of LD1 vs LD3
geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
scale_shape_manual(values = shapes_pop_DAPC_kmeans) +
scale_fill_manual(values = cols_pop_DAPC_kmeans) +
labs(shape = population_assignment_name, fill = population_assignment_name, x = LD1_label, y = LD3_label) +
theme_classic() +
guides(override.aes = list(alpha = 1))
dev.off()
svg("DAPC_kmeans_densi_LD1_pop_dataset.svg", width = plot_width, height = plot_height)
ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD1, fill = group)) + #density plot for LD1
geom_density(alpha = density_plot_alpha) +
geom_density(alpha = density_plot_alpha) +
scale_fill_manual(values = cols_pop_DAPC_kmeans) +
theme_void() +
theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank()) +
coord_flip()
dev.off()
svg("DAPC_kmeans_densi_LD2_pop_dataset.svg", width = plot_height, height = plot_height)
ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD2, fill = group)) + #density plot for LD2
geom_density(alpha = density_plot_alpha) +
scale_fill_manual(values = cols_pop_DAPC_kmeans) +
theme_void() +
theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank()) +
coord_flip()
dev.off()
svg("DAPC_kmeans_densi_LD3_pop_dataset.svg", width = plot_height, height = plot_height)
ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD3, fill = group)) + #density plot for LD3
geom_density(alpha = density_plot_alpha) +
scale_fill_manual(values = cols_pop_DAPC_kmeans) +
theme_void() +
theme(legend.position = "none", axis.title = element_blank(), axis.text = element_blank(), axis.ticks = element_blank()) +
coord_flip()
dev.off()
It is then possible to combine the DAPC plots with the density plots (e.g., using Power Point)
plotly::plot_ly( #3D plot of DAPC_kmeans using LD1-LD3
data = dapc_kmeans_df_pop_dataset,
x = ~LD1, y = ~LD2, z = ~LD3,
color = ~group, colors = cols_pop_DAPC_kmeans,
type = 'scatter3d', mode = 'markers',
alpha = point_alpha_3D_plot, #marker transparency
marker = list(size = point_size_3D_plot)) #marker size
Plot the DAPC with scaled LD axes according to proportion of variance explained by each respective eigenvalue to ensure that distances between points in 3D plot reflect each discriminant function’s contribution
eig_proportions_dapc_kmeans <- dapc_result_pop_dataset_kmeans$eig / sum(dapc_result_pop_dataset_kmeans$eig) #calculate relative proportions of eigenvalues
dapc_kmeans_df_pop_dataset_scaled <- dapc_df_pop_dataset
dapc_kmeans_df_pop_dataset_scaled$LD1 <- dapc_kmeans_df_pop_dataset_scaled$LD1 * sqrt(eig_proportions_dapc_kmeans[1]) #scale LD1
dapc_kmeans_df_pop_dataset_scaled$LD2 <- dapc_kmeans_df_pop_dataset_scaled$LD2 * sqrt(eig_proportions_dapc_kmeans[2]) #scale LD2
dapc_kmeans_df_pop_dataset_scaled$LD3 <- dapc_kmeans_df_pop_dataset_scaled$LD3 * sqrt(eig_proportions_dapc_kmeans[3]) #scale LD3
ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD1, y = LD2)) + #kmeans DAPC scaled scatterplot of LD1 vs LD2
geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
scale_shape_manual(values = shapes_pop_DAPC_kmeans) +
scale_fill_manual(values = cols_pop_DAPC_kmeans) +
labs(shape = population_assignment_name, fill = population_assignment_name, x = paste("LD1 (scaled)"), y = paste("LD2 (scaled)")) +
theme_classic() +
coord_equal() +
guides(override.aes = list(alpha = 1))
ggplot(dapc_kmeans_df_pop_dataset, aes(x = LD1, y = LD3)) + #kmeans DAPC scaled scatterplot of LD1 vs LD2
geom_point(size = point_size, alpha = point_alpha, aes(shape = group, fill = group), color = point_outline_color) +
scale_shape_manual(values = shapes_pop_DAPC_kmeans) +
scale_fill_manual(values = cols_pop_DAPC_kmeans) +
labs(shape = population_assignment_name, fill = population_assignment_name, x = paste("LD1 (scaled)"), y = paste("LD3 (scaled)")) +
theme_classic() +
coord_equal() +
guides(override.aes = list(alpha = 1))
plotly::plot_ly( #3D plot of DAPC_kmeans using scaled LD1-LD3
data = dapc_kmeans_df_pop_dataset_scaled,
x = ~LD1, y = ~LD2, z = ~LD3,
color = ~Cluster, colors = cols_pop_DAPC_kmeans,
type = 'scatter3d', mode = 'markers',
alpha = point_alpha_3D_plot, marker = list(size = point_size_3D_plot)
) %>% layout(scene = list(xaxis = list(title = "LD1", range = c(min(c(range(dapc_kmeans_df_pop_dataset_scaled$LD1), range(dapc_kmeans_df_pop_dataset_scaled$LD2), range(dapc_kmeans_df_pop_dataset_scaled$LD3))), max(c(range(dapc_kmeans_df_pop_dataset_scaled$LD1), range(dapc_kmeans_df_pop_dataset_scaled$LD2), range(dapc_kmeans_df_pop_dataset_scaled$LD3))))),
yaxis = list(title = "LD2", range = c(min(c(range(dapc_kmeans_df_pop_dataset_scaled$LD1), range(dapc_kmeans_df_pop_dataset_scaled$LD2), range(dapc_kmeans_df_pop_dataset_scaled$LD3))), max(c(range(dapc_kmeans_df_pop_dataset_scaled$LD1), range(dapc_kmeans_df_pop_dataset_scaled$LD2), range(dapc_kmeans_df_pop_dataset_scaled$LD3))))),
zaxis = list(title = "LD3", range = c(min(c(range(dapc_kmeans_df_pop_dataset_scaled$LD1), range(dapc_kmeans_df_pop_dataset_scaled$LD2), range(dapc_kmeans_df_pop_dataset_scaled$LD3))), max(c(range(dapc_kmeans_df_pop_dataset_scaled$LD1), range(dapc_kmeans_df_pop_dataset_scaled$LD2), range(dapc_kmeans_df_pop_dataset_scaled$LD3)))))))
dapc_kmeans_group_memberships <- as.data.frame(dapc_result_pop_dataset_kmeans$posterior) #convert posterior probabilities to data frame
dapc_kmeans_group_memberships$ID <- row.names(dapc_kmeans_group_memberships) #add ID column
dapc_kmeans_group_memberships$Population <- pop_dataset$pop[match(dapc_kmeans_group_memberships$ID, indNames(pop_dataset))] #match populations to IDs
dapc_kmeans_group_memberships_long <- melt(dapc_kmeans_group_memberships, id.vars = c("ID", "Population"), variable.name = "Cluster", value.name = "Probability") #reshape to long format
svg("DAPC_kmeans_Membership_probabilities.svg", width = 25/2.56, height = 25/2.56)
ggplot(dapc_kmeans_group_memberships_long, aes(x = Probability, y = ID, fill = Cluster)) + #create Structure-like plots
geom_bar(stat = "identity") +
theme_classic() +
scale_fill_manual(values = cols_pop_DAPC_kmeans) + #use viridis color palette
facet_wrap(~ Population, scales = "free_y") + #split y-axis by population
labs(fill = "Cluster") +
theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 3), axis.text.x = element_blank(),
axis.title.y = element_blank(), axis.title.x = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), panel.background = element_blank(), plot.background = element_blank())
dev.off()
##### Assess allele differentiation
dapc_kmeans_loading_LD1 <- adegenet::loadingplot(dapc_result_pop_dataset_kmeans$var.contr[, 1], lab.jitter = 100, #plot loading for first LD axis
thres = quantile(dapc_result_pop_dataset_kmeans$var.contr[, 1], 0.99)) #set threshold to 99th percentile (top 1% of loadings)
dapc_kmeans_loading_LD2 <- adegenet::loadingplot(dapc_result_pop_dataset_kmeans$var.contr[, 2], lab.jitter = 100, #plot loading for second LD axis
thres = quantile(dapc_result_pop_dataset_kmeans$var.contr[, 2], 0.99)) #set threshold to 99th percentile (top 1% of loadings)
dapc_kmeans_loading_LD1$var.names #print most differentiated alleles for LD1
## [1] "ctg00000001_1_12521730.0" "ctg00000001_1_12521730.1"
## [3] "ctg00000003_1_4110250.0" "ctg00000005_1_16585380.1"
## [5] "ctg00000012_1_13095659.0" "ctg00000012_1_13095659.1"
dapc_kmeans_loading_LD2$var.names #print most differentiated alleles for LD2
## [1] "ctg00000001_1_12416200.0" "ctg00000001_1_12416200.1"
## [3] "ctg00000003_1_2808894.0" "ctg00000003_1_2808894.1"
## [5] "ctg00000005_1_16183223.0" "ctg00000005_1_16183223.1"
intersect(dapc_kmeans_loading_LD1$var.names, dapc_kmeans_loading_LD2$var.names) #show differentiated alleles for both LD1 and LD2
## character(0)
dapc_kmeans_group_memberships <- as.data.frame(dapc_result_pop_dataset_kmeans$posterior) #convert posterior probabilities to data frame
subset_size <- 40 #define subset size
plot_assignments <- function(dapc_kmeans_result, subset_size, max_rows) { #plot group memberships (heat colors represent membership probabilities with red=1 and white=0, blue crosses represent prior Cluster_assignment provided to DAPC kmeans)
num_plots <- ceiling(max_rows / subset_size) #calculate number of plots needed
for (i in 0:(num_plots - 1)) {
start_row <- i * subset_size + 1 #start index for subset
end_row <- min((i + 1) * subset_size, max_rows) #end index for subset
cat("Plotting rows", start_row, "to", end_row, "\n") #print status
adegenet::assignplot(dapc_kmeans_result, subset = start_row:end_row)}} #plot assignments for current subset
plot_assignments(dapc_result_pop_dataset_kmeans, subset_size, nrow(dapc_kmeans_group_memberships)) #plot all subsets
## Plotting rows 1 to 40
## Plotting rows 41 to 80
## Plotting rows 81 to 120
## Plotting rows 121 to 160
## Plotting rows 161 to 163
dapc_result_pop_dataset_kmeans$grp <- factor(dapc_result_pop_dataset_kmeans$grp, levels = unique(dapc_result_pop_dataset_kmeans$grp))
admixed_individuals <- which(apply(dapc_result_pop_dataset_kmeans$posterior, 1, function(e) all(e < 0.80))) #identify admixed individuals (posterior probabilities less than 0.80)
admixed_individuals #show admixed individuals
## H.m.maia_NY_DR95
## 127
if (length(admixed_individuals) > 0) {adegenet::compoplot(dapc_result_pop_dataset_kmeans, col.pal = cols_pop_DAPC_kmeans, subset = admixed_individuals)} #plot admixed individuals
dapc_result_pop_dataset_kmeans_posterior <- as.data.frame(dapc_result_pop_dataset_kmeans$posterior)
dapc_result_pop_dataset_kmeans_posterior[sapply(dapc_result_pop_dataset_kmeans_posterior, is.numeric)] <- round(dapc_result_pop_dataset_kmeans_posterior[sapply(dapc_result_pop_dataset_kmeans_posterior, is.numeric)], 2) #round numeric columns
dapc_result_pop_dataset_kmeans_posterior$ID <- rownames(dapc_result_pop_dataset_kmeans_posterior) #add ID column
assign_clusters <- function(probs, threshold_high, threshold_low) { #define function to assign clusters
high_probs <- which(probs > threshold_high)
mid_probs <- which(probs > threshold_low & probs <= threshold_high)
if (length(high_probs) == 1) {
assigned_cluster <- as.character(high_probs) #single cluster assignment
} else if (length(mid_probs) >= 2 & length(mid_probs) <= 3) {
assigned_cluster <- paste(mid_probs, collapse = "+") #mixed cluster assignment
} else {assigned_cluster <- admixed_individuals_cluster} #admixed assignment
return(assigned_cluster)}
dapc_result_pop_dataset_kmeans_posterior$Assigned_cluster <- apply( #apply function to each cluster
dapc_result_pop_dataset_kmeans_posterior[, 1:length(unique(levels(dapc_result_pop_dataset_kmeans$grp)))], 1,
assign_clusters, threshold_high = threshold_high, threshold_low = threshold_low)
dapc_kmeans_final_dataset <- left_join(dataset, dapc_result_pop_dataset_kmeans_posterior, by = "ID") #combine datasets
dapc_kmeans_dataset_map <- dapc_kmeans_final_dataset %>% tidyr::drop_na() #remove empty rows for plotting
DAPC_kmeans_map_color_palette <- viridis::viridis #set colorblind-friendly color palette
dapc_kmeans_dataset_map$Assigned_cluster <- factor(dapc_kmeans_dataset_map$Assigned_cluster, levels = unique(c(sort(as.character(dapc_kmeans_dataset_map$Assigned_cluster[stringr::str_detect(dapc_kmeans_dataset_map$Assigned_cluster, "^[0-9]+$")])), sort(dapc_kmeans_dataset_map$Assigned_cluster[stringr::str_detect(dapc_kmeans_dataset_map$Assigned_cluster, "^[0-9]+(\\+[0-9]+){1,2}$")]), admixed_individuals_cluster)))#reorder factor levels in dataset so that single clusters are first, followed by multiple clusters and admixed_individual_cluster
dapc_kmeans_dataset_map$Assigned_cluster <- droplevels(dapc_kmeans_dataset_map$Assigned_cluster)
svg("DAPC_kmeans_cluster_map.svg", width = DAPC_map_width, height = DAPC_map_height)
map_DAPC_cluster <- ggplot(data = dapc_kmeans_dataset_map, aes(x = Longitude, y = Latitude)) +
theme_void() + #use theme_void for no background
borders("world", colour = DAPC_map_country_border_color, fill = DAPC_map_filling_color,
linewidth = DAPC_map_country_border_thickness) +
geom_polygon(data = map_data("state"), aes(x = long, y = lat, group = group), #add state borders (only for USA!)
fill = NA, colour = DAPC_map_state_border_color,
linewidth = DAPC_map_state_border_thickness) +
geom_point(aes(fill = Assigned_cluster, shape = Assigned_cluster),
alpha = DAPC_map_point_alpha, color = DAPC_map_point_outline_color,
size = DAPC_map_point_size) +
geom_jitter(aes(fill = Assigned_cluster, shape = Assigned_cluster),
width = DAPC_map_point_jitter, height = DAPC_map_point_jitter,
alpha = DAPC_map_point_alpha, color = DAPC_map_point_outline_color,
size = DAPC_map_point_size) +
coord_fixed(xlim = xlim_range, ylim = ylim_range) + #set coordinates with buffer
scale_fill_manual(values = DAPC_kmeans_map_color_palette(n = length(levels(unique(dapc_kmeans_dataset_map$Assigned_cluster))), begin = 0, end = 1)) + #assign colors
scale_shape_manual(values = rep(c(21, 24, 22, 25), length.out = length(levels(unique(dapc_kmeans_dataset_map$Assigned_cluster))))) + #assign shapes
guides(fill = guide_legend(override.aes = list(size = DAPC_map_point_size, alpha = 1)),
shape = guide_legend(override.aes = list(size = DAPC_map_point_size, alpha = 1))) +
labs(fill = "Cluster", shape = "Cluster") #add labels
suppressWarnings(print(map_DAPC_cluster)) #print map and suppress warnings
dev.off() #close svg device
names(dapc_kmeans_df_pop_dataset)[names(dapc_kmeans_df_pop_dataset) == "group"] <- "Cluster"
dapc_kmeans_df_pop_dataset$ID <- row.names(dapc_kmeans_df_pop_dataset)
dapc_kmeans_final_dataset <- left_join(dapc_kmeans_df_pop_dataset, dapc_kmeans_final_dataset, by = "ID")
write.table(dapc_kmeans_final_dataset[order(dapc_kmeans_final_dataset$ID), ], #sort by ID
file = "DAPC_kmeans_final_dataset_assignment_probabilities_clusters",
sep = "\t", dec = ",", row.names = F, col.names = T, quote = F)
dapc_kmeans_final_dataset <- droplevels(na.omit(dapc_kmeans_final_dataset)) #drop rows with NA values and remove unused factor levels
dapc_kmeans_final_dataset$Assigned_cluster <- factor(dapc_kmeans_final_dataset$Assigned_cluster, levels = c(unique(as.character(1:length(unique(dapc_kmeans_df_pop_dataset$Cluster)))), sort(unique(dapc_kmeans_final_dataset$Assigned_cluster[grep("\\+", dapc_kmeans_final_dataset$Assigned_cluster)])), admixed_individuals_cluster)) #reorder factor levels in dataset so that single clusters are first, followed by multiple clusters and admixed_individuals_cluster
ggplot(dapc_kmeans_final_dataset, aes(x = LD1, y = LD2)) + #DAPC scatterplot of LD1 vs LD2
geom_point(size = point_size, alpha = point_alpha,
aes(shape = Assigned_cluster, fill = Assigned_cluster), color = point_outline_color) +
scale_shape_manual(values = rep(c(21, 22, 24, 25), length.out = length(levels(dapc_kmeans_final_dataset$Assigned_cluster)))) +
scale_fill_manual(values = DAPC_map_color_palette(n = length(levels(dapc_kmeans_final_dataset$Assigned_cluster)), begin = 0, end = 1)) +
labs(shape = population_assignment_name, fill = population_assignment_name) +
theme_classic()
ggplot(dapc_kmeans_final_dataset, aes(x = LD1, y = LD3)) + #DAPC scatterplot of LD1 vs LD3
geom_point(size = point_size, alpha = point_alpha,
aes(shape = Assigned_cluster, fill = Assigned_cluster), color = point_outline_color) +
scale_shape_manual(values = rep(c(21, 22, 24, 25), length.out = length(levels(dapc_kmeans_final_dataset$Assigned_cluster)))) +
scale_fill_manual(values = DAPC_map_color_palette(n = length(levels(dapc_kmeans_final_dataset$Assigned_cluster)), begin = 0, end = 1)) +
labs(shape = population_assignment_name, fill = population_assignment_name) +
theme_classic()
plotly::plot_ly( #3D plot of DAPC using LD1-LD3
data = dapc_kmeans_final_dataset,
x = ~LD1, y = ~LD2, z = ~LD3,
color = ~Assigned_cluster,
colors = DAPC_map_color_palette(n = length(levels(dapc_kmeans_final_dataset$Assigned_cluster)), begin = 0, end = 1),
type = 'scatter3d', mode = 'markers',
alpha = point_alpha_3D_plot, #marker transparency
marker = list(size = point_size_3D_plot)) #marker size